diff options
author | Kiyoshi Aman <kiyoshi.aman+adelie@gmail.com> | 2019-02-01 22:55:37 +0000 |
---|---|---|
committer | Kiyoshi Aman <kiyoshi.aman+adelie@gmail.com> | 2019-02-03 18:22:05 -0600 |
commit | 5b57d28ffb6e1ef86b50f7d05d977826eae89bfe (patch) | |
tree | 154a22fe556b49e6927197336f8bf91b12eacd5e /usr.bin/tr | |
download | userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.gz userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.bz2 userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.xz userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.zip |
initial population
Diffstat (limited to 'usr.bin/tr')
-rw-r--r-- | usr.bin/tr/extern.h | 43 | ||||
-rw-r--r-- | usr.bin/tr/str.c | 453 | ||||
-rw-r--r-- | usr.bin/tr/tr.1 | 352 | ||||
-rw-r--r-- | usr.bin/tr/tr.c | 283 |
4 files changed, 1131 insertions, 0 deletions
diff --git a/usr.bin/tr/extern.h b/usr.bin/tr/extern.h new file mode 100644 index 0000000..8cb2de9 --- /dev/null +++ b/usr.bin/tr/extern.h @@ -0,0 +1,43 @@ +/* $NetBSD: extern.h,v 1.11 2013/08/11 00:39:22 dholland Exp $ */ + +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)extern.h 8.1 (Berkeley) 6/6/93 + */ + +struct str; +typedef struct str STR; + +#include <limits.h> +#define NCHARS (UCHAR_MAX + 1) /* Number of possible characters. */ +#define OOBCH (UCHAR_MAX + 1) /* Out of band character value. */ + +STR *str_create(int, const char *); +void str_destroy(STR *); +int next(STR *, int *); diff --git a/usr.bin/tr/str.c b/usr.bin/tr/str.c new file mode 100644 index 0000000..a9dccde --- /dev/null +++ b/usr.bin/tr/str.c @@ -0,0 +1,453 @@ +/* $NetBSD: str.c,v 1.30 2018/05/26 11:20:30 leot Exp $ */ + +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#ifndef lint +#if 0 +static char sccsid[] = "@(#)str.c 8.2 (Berkeley) 4/28/95"; +#endif +__RCSID("$NetBSD: str.c,v 1.30 2018/05/26 11:20:30 leot Exp $"); +#endif /* not lint */ + +#include <sys/types.h> + +#include <err.h> +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <assert.h> + +#include "extern.h" + +struct str { + enum { STRING1, STRING2 } which; + enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state; + int cnt; /* character count */ + int lastch; /* last character */ + int equiv[2]; /* equivalence set */ + int *set; /* set of characters */ + const char *str; /* user's string */ +}; + +static int backslash(STR *); +static int bracket(STR *); +static int c_class(const void *, const void *); +static int *genclass(const char *, size_t); +static void genequiv(STR *); +static int genrange(STR *); +static void genseq(STR *); + +STR * +str_create(int whichstring, const char *txt) +{ + STR *s; + + s = malloc(sizeof(*s)); + if (s == NULL) { + err(1, "Out of memory"); + } + + s->which = whichstring == 2 ? STRING2 : STRING1; + s->state = NORMAL; + s->cnt = 0; + s->lastch = OOBCH; + s->equiv[0] = 0; + s->equiv[1] = OOBCH; + s->set = NULL; + s->str = txt; + + return s; +} + +void +str_destroy(STR *s) +{ + if (s->set != NULL && s->set != s->equiv) { + free(s->set); + } + free(s); +} + +int +next(STR *s, int *ret) +{ + int ch; + + switch (s->state) { + case EOS: + *ret = s->lastch; + return 0; + case INFINITE: + *ret = s->lastch; + return 1; + case NORMAL: + ch = (unsigned char)s->str[0]; + switch (ch) { + case '\0': + s->state = EOS; + *ret = s->lastch; + return 0; + case '\\': + s->lastch = backslash(s); + break; + case '[': + if (bracket(s)) { + return next(s, ret); + } + /* FALLTHROUGH */ + default: + ++s->str; + s->lastch = ch; + break; + } + + /* We can start a range at any time. */ + if (s->str[0] == '-' && genrange(s)) { + return next(s, ret); + } + *ret = s->lastch; + return 1; + case RANGE: + if (s->cnt == 0) { + s->state = NORMAL; + return next(s, ret); + } + s->cnt--; + ++s->lastch; + *ret = s->lastch; + return 1; + case SEQUENCE: + if (s->cnt == 0) { + s->state = NORMAL; + return next(s, ret); + } + s->cnt--; + *ret = s->lastch; + return 1; + case SET: + s->lastch = s->set[s->cnt++]; + if (s->lastch == OOBCH) { + s->state = NORMAL; + if (s->set != s->equiv) { + free(s->set); + } + s->set = NULL; + return next(s, ret); + } + *ret = s->lastch; + return 1; + } + /* NOTREACHED */ + assert(0); + *ret = s->lastch; + return 0; +} + +static int +bracket(STR *s) +{ + const char *p; + int *q; + + switch (s->str[1]) { + case ':': /* "[:class:]" */ + if ((p = strstr(s->str + 2, ":]")) == NULL) + return 0; + s->str += 2; + q = genclass(s->str, p - s->str); + s->state = SET; + s->set = q; + s->cnt = 0; + s->str = p + 2; + return 1; + case '=': /* "[=equiv=]" */ + if ((p = strstr(s->str + 2, "=]")) == NULL) + return 0; + s->str += 2; + genequiv(s); + s->str = p + 2; + return 1; + default: /* "[\###*n]" or "[#*n]" */ + if ((p = strpbrk(s->str + 2, "*]")) == NULL) + return 0; + if (p[0] != '*' || strchr(p, ']') == NULL) + return 0; + s->str += 1; + genseq(s); + return 1; + } + /* NOTREACHED */ +} + +typedef struct { + const char *name; + int (*func)(int); +} CLASS; + +static const CLASS classes[] = { + { "alnum", isalnum }, + { "alpha", isalpha }, + { "blank", isblank }, + { "cntrl", iscntrl }, + { "digit", isdigit }, + { "graph", isgraph }, + { "lower", islower }, + { "print", isprint }, + { "punct", ispunct }, + { "space", isspace }, + { "upper", isupper }, + { "xdigit", isxdigit }, +}; + +typedef struct { + const char *name; + size_t len; +} CLASSKEY; + +static int * +genclass(const char *class, size_t len) +{ + int ch; + const CLASS *cp; + CLASSKEY key; + int *p; + unsigned pos, num; + + /* Find the class */ + key.name = class; + key.len = len; + cp = bsearch(&key, classes, __arraycount(classes), sizeof(classes[0]), + c_class); + if (cp == NULL) { + errx(1, "unknown class %.*s", (int)len, class); + } + + /* + * Figure out what characters are in the class + */ + + num = NCHARS + 1; + p = malloc(num * sizeof(*p)); + if (p == NULL) { + err(1, "malloc"); + } + + pos = 0; + for (ch = 0; ch < NCHARS; ch++) { + if (cp->func(ch)) { + p[pos++] = ch; + } + } + + p[pos++] = OOBCH; + for (; pos < num; pos++) { + p[pos] = 0; + } + + return p; +} + +static int +c_class(const void *av, const void *bv) +{ + const CLASSKEY *a = av; + const CLASS *b = bv; + size_t blen; + int r; + + blen = strlen(b->name); + r = strncmp(a->name, b->name, a->len); + if (r != 0) { + return r; + } + if (a->len < blen) { + /* someone gave us a prefix of the right name */ + return -1; + } + assert(a-> len == blen); + return 0; +} + +/* + * English doesn't have any equivalence classes, so for now + * we just syntax check and grab the character. + */ +static void +genequiv(STR *s) +{ + int ch; + + ch = (unsigned char)s->str[0]; + if (ch == '\\') { + s->equiv[0] = backslash(s); + } else { + s->equiv[0] = ch; + s->str++; + } + if (s->str[0] != '=') { + errx(1, "Misplaced equivalence equals sign"); + } + s->str++; + if (s->str[0] != ']') { + errx(1, "Misplaced equivalence right bracket"); + } + s->str++; + + s->cnt = 0; + s->state = SET; + s->set = s->equiv; +} + +static int +genrange(STR *s) +{ + int stopval; + const char *savestart; + + savestart = s->str++; + stopval = s->str[0] == '\\' ? backslash(s) : (unsigned char)*s->str++; + if (stopval < (unsigned char)s->lastch) { + s->str = savestart; + return 0; + } + s->cnt = stopval - s->lastch + 1; + s->state = RANGE; + --s->lastch; + return 1; +} + +static void +genseq(STR *s) +{ + char *ep; + + if (s->which == STRING1) { + errx(1, "Sequences only valid in string2"); + } + + if (*s->str == '\\') { + s->lastch = backslash(s); + } else { + s->lastch = (unsigned char)*s->str++; + } + if (*s->str != '*') { + errx(1, "Misplaced sequence asterisk"); + } + + s->str++; + switch (s->str[0]) { + case '\\': + s->cnt = backslash(s); + break; + case ']': + s->cnt = 0; + ++s->str; + break; + default: + if (isdigit((unsigned char)s->str[0])) { + s->cnt = strtol(s->str, &ep, 0); + if (*ep == ']') { + s->str = ep + 1; + break; + } + } + errx(1, "illegal sequence count"); + /* NOTREACHED */ + } + + s->state = s->cnt ? SEQUENCE : INFINITE; +} + +/* + * Translate \??? into a character. Up to 3 octal digits, if no digits either + * an escape code or a literal character. + */ +static int +backslash(STR *s) +{ + int ch, cnt, val; + + cnt = val = 0; + for (;;) { + /* Consume the character we're already on. */ + s->str++; + + /* Look at the next character. */ + ch = (unsigned char)s->str[0]; + if (!isascii(ch) || !isdigit(ch)) { + break; + } + val = val * 8 + ch - '0'; + if (++cnt == 3) { + /* Enough digits; consume this one and stop */ + ++s->str; + break; + } + } + if (cnt) { + /* We saw digits, so return their value */ + if (val >= OOBCH) + errx(1, "Invalid octal character value"); + return val; + } + if (ch == '\0') { + /* \<end> -> \ */ + s->state = EOS; + return '\\'; + } + + /* Consume the escaped character */ + s->str++; + + switch (ch) { + case 'a': /* escape characters */ + return '\7'; + case 'b': + return '\b'; + case 'e': + return '\033'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case 'v': + return '\13'; + default: /* \q -> q */ + return ch; + } +} diff --git a/usr.bin/tr/tr.1 b/usr.bin/tr/tr.1 new file mode 100644 index 0000000..fa9d121 --- /dev/null +++ b/usr.bin/tr/tr.1 @@ -0,0 +1,352 @@ +.\" $NetBSD: tr.1,v 1.22 2017/07/03 21:34:22 wiz Exp $ +.\" +.\" Copyright (c) 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)tr.1 8.1 (Berkeley) 6/6/93 +.\" +.Dd May 29, 2013 +.Dt TR 1 +.Os +.Sh NAME +.Nm tr +.Nd translate characters +.Sh SYNOPSIS +.Nm +.Op Fl cs +.Ar string1 string2 +.Nm +.Op Fl c +.Fl d +.Ar string1 +.Nm +.Op Fl c +.Fl s +.Ar string1 +.Nm +.Op Fl c +.Fl ds +.Ar string1 string2 +.Sh DESCRIPTION +The +.Nm +utility copies the standard input to the standard output with substitution +or deletion of selected characters. +.Pp +The following options are available: +.Bl -tag -width Ds +.It Fl c +Complements the set of characters in +.Ar string1 ; +that is, +.Fl c Ar \&ab +includes every character except for +.Sq a +and +.Sq b . +.It Fl d +The +.Fl d +option causes characters to be deleted from the input. +.It Fl s +The +.Fl s +option squeezes multiple occurrences of the characters listed in the last +operand (either +.Ar string1 +or +.Ar string2 ) +in the input into a single instance of the character. +This occurs after all deletion and translation is completed. +.El +.Pp +In the first synopsis form, the characters in +.Ar string1 +are translated into the characters in +.Ar string2 +where the first character in +.Ar string1 +is translated into the first character in +.Ar string2 +and so on. +If +.Ar string1 +is longer than +.Ar string2 , +the last character found in +.Ar string2 +is duplicated until +.Ar string1 +is exhausted. +.Pp +In the second synopsis form, the characters in +.Ar string1 +are deleted from the input. +.Pp +In the third synopsis form, the characters in +.Ar string1 +are compressed as described for the +.Fl s +option. +.Pp +In the fourth synopsis form, the characters in +.Ar string1 +are deleted from the input, and the characters in +.Ar string2 +are compressed as described for the +.Fl s +option. +.Pp +The following conventions can be used in +.Ar string1 +and +.Ar string2 +to specify sets of characters: +.Bl -tag -width [:equiv:] +.It character +Any character not described by one of the following conventions +represents itself. +.It \eoctal +A backslash followed by 1, 2 or 3 octal digits represents a character +with that encoded value. +To follow an octal sequence with a digit as a character, left zero-pad +the octal sequence to the full 3 octal digits. +.It \echaracter +A backslash followed by certain special characters maps to special +values. +.sp +.Bl -column cc +.It \ea <alert character> +.It \eb <backspace> +.It \ef <form-feed> +.It \en <newline> +.It \er <carriage return> +.It \et <tab> +.It \ev <vertical tab> +.El +.sp +A backslash followed by any other character maps to that character. +.It c-c +Represents the range of characters between the range endpoints, inclusively. +.It [:class:] +Represents all characters belonging to the defined character class. +Class names are: +.sp +.Bl -column xdigit +.It alnum <alphanumeric characters> +.It alpha <alphabetic characters> +.It blank <blank characters> +.It cntrl <control characters> +.It digit <numeric characters> +.It graph <graphic characters> +.It lower <lower-case alphabetic characters> +.It print <printable characters> +.It punct <punctuation characters> +.It space <space characters> +.It upper <upper-case characters> +.It xdigit <hexadecimal characters> +.El +.Pp +.\" All classes may be used in +.\" .Ar string1 , +.\" and in +.\" .Ar string2 +.\" when both the +.\" .Fl d +.\" and +.\" .Fl s +.\" options are specified. +.\" Otherwise, only the classes ``upper'' and ``lower'' may be used in +.\" .Ar string2 +.\" and then only when the corresponding class (``upper'' for ``lower'' +.\" and vice-versa) is specified in the same relative position in +.\" .Ar string1 . +.\" .Pp +With the exception of the +.Dq upper +and +.Dq lower +classes, characters in the classes are in unspecified order. +In the +.Dq upper +and +.Dq lower +classes, characters are entered in ascending order. +.Pp +For specific information as to which ASCII characters are included +in these classes, see +.Xr ctype 3 +and related manual pages. +.It [=equiv=] +Represents all characters or collating (sorting) elements belonging to +the same equivalence class as +.Ar equiv . +If there is a secondary ordering within the equivalence class, the +characters are ordered in ascending sequence. +Otherwise, they are ordered after their encoded values. +An example of an equivalence class might be +.Dq \&c +and +.Dq \&ch +in Spanish; +English has no equivalence classes. +.It [#*n] +Represents +.Ar n +repeated occurrences of the character represented by +.Ar # . +This +expression is only valid when it occurs in +.Ar string2 . +If +.Ar n +is omitted or is zero, it is interpreted as large enough to extend the +.Ar string2 +sequence to the length of +.Ar string1 . +If +.Ar n +has a leading zero, it is interpreted as an octal value; +otherwise, it is interpreted as a decimal value. +.El +.Sh EXIT STATUS +.Ex -std +.Sh EXAMPLES +The following examples are shown as given to the shell: +.Pp +Create a list of the words in +.Ar file1 , +one per line, where a word is taken to be a maximal string of letters: +.sp +.D1 Li "tr -cs \*q[:alpha:]\*q \*q\en\*q < file1" +.sp +Translate the contents of +.Ar file1 +to upper-case: +.sp +.D1 Li "tr \*q[:lower:]\*q \*q[:upper:]\*q < file1" +.sp +Strip out non-printable characters from +.Ar file1 : +.sp +.D1 Li "tr -cd \*q[:print:]\*q < file1" +.Sh COMPATIBILITY +.At V +has historically implemented character ranges using the syntax +.Dq [c-c] +instead of the +.Dq c-c +used by historic +.Bx +implementations and standardized by POSIX. +.At V +shell scripts should work under this implementation as long as +the range is intended to map in another range, i.e. the command +.Pp +.Ic "tr [a-z] [A-Z]" +.Pp +will work as it will map the +.Sq \&[ +character in +.Ar string1 +to the +.Sq \&[ +character in +.Ar string2 . +However, if the shell script is deleting or squeezing characters as in +the command +.Pp +.Ic "tr -d [a-z]" +.Pp +the characters +.Sq \&[ +and +.Sq \&] +will be included in the deletion or compression list which would +not have happened under an historic +.At V +implementation. +Additionally, any scripts that depended on the sequence +.Dq a-z +to represent the three characters +.Sq \&a , +.Sq \&- , +and +.Sq \&z +will have to be rewritten as +.Dq a\e-z . +.Pp +The +.Nm +utility has historically not permitted the manipulation of NUL bytes in +its input and, additionally, stripped NUL's from its input stream. +This implementation has removed this behavior as a bug. +.Pp +The +.Nm +utility has historically been extremely forgiving of syntax errors, +for example, the +.Fl c +and +.Fl s +options were ignored unless two strings were specified. +This implementation will not permit illegal syntax. +.Sh SEE ALSO +.Xr dd 1 , +.Xr sed 1 +.Sh STANDARDS +The +.Nm +utility is expected to be +.St -p1003.2 +compatible. +It should be noted that the feature wherein the last character of +.Ar string2 +is duplicated if +.Ar string2 +has less characters than +.Ar string1 +is permitted by POSIX but is not required. +Shell scripts attempting to be portable to other POSIX systems should use +the +.Dq [#*n] +convention instead of relying on this behavior. +.Sh BUGS +.Nm +was originally designed to work with +.Tn US-ASCII . +Its use with character sets that do not share all the properties of +.Tn US-ASCII , +e.g., a symmetric set of upper and lower case characters +that can be algorithmically converted one to the other, +may yield unpredictable results. +.Pp +.Nm +should be internationalized. diff --git a/usr.bin/tr/tr.c b/usr.bin/tr/tr.c new file mode 100644 index 0000000..8221880 --- /dev/null +++ b/usr.bin/tr/tr.c @@ -0,0 +1,283 @@ +/* $NetBSD: tr.c,v 1.20 2013/08/11 01:54:35 dholland Exp $ */ + +/* + * Copyright (c) 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#ifndef lint +__COPYRIGHT("@(#) Copyright (c) 1988, 1993\ + The Regents of the University of California. All rights reserved."); +#endif /* not lint */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95"; +#endif +__RCSID("$NetBSD: tr.c,v 1.20 2013/08/11 01:54:35 dholland Exp $"); +#endif /* not lint */ + +#include <sys/types.h> + +#include <err.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "extern.h" + +static int string1[NCHARS], string2[NCHARS]; + +static void setup(int *, const char *, int, int); +__dead static void usage(void); + +int +main(int argc, char **argv) +{ + int ch, ch2, lastch; + int cflag, dflag, sflag, isstring2; + STR *s1, *s2; + + cflag = dflag = sflag = 0; + while ((ch = getopt(argc, argv, "cds")) != -1) + switch (ch) { + case 'c': + cflag = 1; + break; + case 'd': + dflag = 1; + break; + case 's': + sflag = 1; + break; + case '?': + default: + usage(); + } + argc -= optind; + argv += optind; + + switch(argc) { + case 0: + default: + usage(); + /* NOTREACHED */ + case 1: + isstring2 = 0; + break; + case 2: + isstring2 = 1; + break; + } + + /* + * tr -ds [-c] string1 string2 + * Delete all characters (or complemented characters) in string1. + * Squeeze all characters in string2. + */ + if (dflag && sflag) { + if (!isstring2) + usage(); + + setup(string1, argv[0], 1, cflag); + setup(string2, argv[1], 2, 0); + + for (lastch = OOBCH; (ch = getchar()) != EOF; ) + if (!string1[ch] && (!string2[ch] || lastch != ch)) { + lastch = ch; + (void)putchar(ch); + } + exit(0); + } + + /* + * tr -d [-c] string1 + * Delete all characters (or complemented characters) in string1. + */ + if (dflag) { + if (isstring2) + usage(); + + setup(string1, argv[0], 1, cflag); + + while ((ch = getchar()) != EOF) + if (!string1[ch]) + (void)putchar(ch); + exit(0); + } + + /* + * tr -s [-c] string1 + * Squeeze all characters (or complemented characters) in string1. + */ + if (sflag && !isstring2) { + setup(string1, argv[0], 1, cflag); + + for (lastch = OOBCH; (ch = getchar()) != EOF;) + if (!string1[ch] || lastch != ch) { + lastch = ch; + (void)putchar(ch); + } + exit(0); + } + + /* + * tr [-cs] string1 string2 + * Replace all characters (or complemented characters) in string1 with + * the character in the same position in string2. If the -s option is + * specified, squeeze all the characters in string2. + */ + if (!isstring2) + usage(); + + /* + * The first and second strings need to be matched up. This + * means that if we are doing -c, we need to scan the first + * string in advance, complement it, and match *that* against + * the second string; otherwise we need to scan them together. + */ + + if (cflag) { + /* + * Scan string 1 and complement it. After this, + * string1[] contains 0 for chars to leave alone and 1 + * for chars to translate. + */ + setup(string1, argv[0], 1, cflag); + s1 = NULL; /* for safety */ + /* we will use ch to iterate over string1, so start it */ + ch = -1; + } else { + /* Create the scanner for string 1. */ + s1 = str_create(1, argv[0]); + for (ch = 0; ch < NCHARS; ch++) { + string1[ch] = ch; + } + } + /* Create the scanner for string 2. */ + s2 = str_create(2, argv[1]); + + /* Read the first char of string 2 first to make sure there is one. */ + if (!next(s2, &ch2)) + errx(1, "empty string2"); + + /* + * Loop over the chars from string 1. After this loop string1[] + * is a mapping from input to output chars. + */ + while (1) { + if (cflag) { + /* + * Try each character in order. For characters we + * skip over because we aren't translating them, + * set the translation to the identity. + */ + ch++; + while (ch < NCHARS && string1[ch] == 0) { + if (string1[ch] == 0) { + string1[ch] = ch; + } + ch++; + } + if (ch == NCHARS) { + break; + } + } + else { + /* Get the next character from string 1. */ + if (!next(s1, &ch)) { + break; + } + } + + /* Set the translation to the character from string 2. */ + string1[ch] = ch2; + + /* Note the characters to squeeze in string2[]. */ + if (sflag) { + string2[ch2] = 1; + } + + /* + * Get the next character from string 2. If it runs + * out, this will keep returning the last character + * over and over again. + */ + (void)next(s2, &ch2); + } + + /* + * Now do it. + */ + + if (sflag) + for (lastch = OOBCH; (ch = getchar()) != EOF;) { + ch = string1[ch]; + if (!string2[ch] || lastch != ch) { + lastch = ch; + (void)putchar(ch); + } + } + else + while ((ch = getchar()) != EOF) + (void)putchar(string1[ch]); + + /* Clean up and exit. */ + if (s1 != NULL) { + str_destroy(s1); + } + str_destroy(s2); + exit (0); +} + +static void +setup(int *string, const char *arg, int whichstring, int cflag) +{ + int cnt, *p; + int ch; + STR *str; + + str = str_create(whichstring, arg); + while (next(str, &ch)) + string[ch] = 1; + if (cflag) + for (p = string, cnt = NCHARS; cnt--; ++p) + *p = !*p; + str_destroy(str); +} + +static void +usage(void) +{ + (void)fprintf(stderr, "usage: tr [-cs] string1 string2\n"); + (void)fprintf(stderr, " tr [-c] -d string1\n"); + (void)fprintf(stderr, " tr [-c] -s string1\n"); + (void)fprintf(stderr, " tr [-c] -ds string1 string2\n"); + exit(1); +} |