From 5b57d28ffb6e1ef86b50f7d05d977826eae89bfe Mon Sep 17 00:00:00 2001 From: Kiyoshi Aman Date: Fri, 1 Feb 2019 22:55:37 +0000 Subject: initial population --- usr.bin/split/split.1 | 132 ++++++++++++++++++ usr.bin/split/split.c | 362 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 494 insertions(+) create mode 100644 usr.bin/split/split.1 create mode 100644 usr.bin/split/split.c (limited to 'usr.bin/split') diff --git a/usr.bin/split/split.1 b/usr.bin/split/split.1 new file mode 100644 index 0000000..a25e3c6 --- /dev/null +++ b/usr.bin/split/split.1 @@ -0,0 +1,132 @@ +.\" $NetBSD: split.1,v 1.15 2007/05/31 01:35:35 jschauma Exp $ +.\" +.\" Copyright (c) 1990, 1991, 1993, 1994 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)split.1 8.3 (Berkeley) 4/16/94 +.\" +.Dd May 28, 2007 +.Dt SPLIT 1 +.Os +.Sh NAME +.Nm split +.Nd split a file into pieces +.Sh SYNOPSIS +.Nm +.Op Fl a Ar suffix_length +.Oo +.Fl b Ar byte_count Ns Oo Li k|m Oc | +.Fl l Ar line_count +.Fl n Ar chunk_count +.Oc +.Op Ar file Op Ar name +.Sh DESCRIPTION +The +.Nm +utility reads the given +.Ar file +and breaks it up into files of 1000 lines each. +If +.Ar file +is a single dash or absent, +.Nm +reads from the standard input. +.Ar file +itself is not altered. +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl a +Use +.Ar suffix_length +letters to form the suffix of the file name. +.It Fl b +Create smaller files +.Ar byte_count +bytes in length. +If +.Ql k +is appended to the number, the file is split into +.Ar byte_count +kilobyte pieces. +If +.Ql m +is appended to the number, the file is split into +.Ar byte_count +megabyte pieces. +.It Fl l +Create smaller files +.Ar line_count +lines in length. +.It Fl n +Split file into +.Ar chunk_count +smaller files. +.El +.Pp +If additional arguments are specified, the first is used as the name +of the input file which is to be split. +If a second additional argument is specified, it is used as a prefix +for the names of the files into which the file is split. +In this case, each file into which the file is split is named by the +prefix followed by a lexically ordered suffix using +.Ar suffix_length +characters in the range +.Dq Li a-z . +If +.Fl a +is not specified, two letters are used as the suffix. +.Pp +If the +.Ar name +argument is not specified, +.Ql x +is used. +.Sh STANDARDS +The +.Nm +utility conforms to +.St -p1003.1-2001 . +.Sh HISTORY +A +.Nm +command appeared in +.At v6 . +.Pp +The +.Fl a +option was introduced in +.Nx 2.0 . +Before that, if +.Ar name +was not specified, +.Nm +would vary the first letter of the filename +to increase the number of possible output files. +The +.Fl a +option makes this unnecessary. diff --git a/usr.bin/split/split.c b/usr.bin/split/split.c new file mode 100644 index 0000000..e538da1 --- /dev/null +++ b/usr.bin/split/split.c @@ -0,0 +1,362 @@ +/* $NetBSD: split.c,v 1.27 2017/01/10 21:14:13 christos Exp $ */ + +/* + * Copyright (c) 1987, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#ifndef lint +__COPYRIGHT("@(#) Copyright (c) 1987, 1993, 1994\ + The Regents of the University of California. All rights reserved."); +#endif /* not lint */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)split.c 8.3 (Berkeley) 4/25/94"; +#endif +__RCSID("$NetBSD: split.c,v 1.27 2017/01/10 21:14:13 christos Exp $"); +#endif /* not lint */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEFLINE 1000 /* Default num lines per file. */ + +static int file_open; /* If a file open. */ +static int ifd = STDIN_FILENO, ofd = -1; /* Input/output file descriptors. */ +static char *fname; /* File name prefix. */ +static size_t sfxlen = 2; /* suffix length. */ + +static void newfile(void); +static void split1(off_t, int) __dead; +static void split2(off_t) __dead; +static void split3(off_t) __dead; +static void usage(void) __dead; +static size_t bigwrite(int, void const *, size_t); + +int +main(int argc, char *argv[]) +{ + int ch; + char *ep, *p; + char const *base; + off_t bytecnt = 0; /* Byte count to split on. */ + off_t numlines = 0; /* Line count to split on. */ + off_t chunks = 0; /* Number of chunks to split into. */ + + while ((ch = getopt(argc, argv, "0123456789b:l:a:n:")) != -1) + switch (ch) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + /* + * Undocumented kludge: split was originally designed + * to take a number after a dash. + */ + if (numlines == 0) { + p = argv[optind - 1]; + if (p[0] == '-' && p[1] == ch && !p[2]) + p++; + else + p = argv[optind] + 1; + numlines = strtoull(p, &ep, 10); + if (numlines == 0 || *ep != '\0') + errx(1, "%s: illegal line count.", p); + } + break; + case 'b': /* Byte count. */ + if (!isdigit((unsigned char)optarg[0]) || + (bytecnt = strtoull(optarg, &ep, 10)) == 0 || + (*ep != '\0' && *ep != 'k' && *ep != 'm')) + errx(1, "%s: illegal byte count.", optarg); + if (*ep == 'k') + bytecnt *= 1024; + else if (*ep == 'm') + bytecnt *= 1024 * 1024; + break; + case 'l': /* Line count. */ + if (numlines != 0) + usage(); + if (!isdigit((unsigned char)optarg[0]) || + (numlines = strtoull(optarg, &ep, 10)) == 0 || + *ep != '\0') + errx(1, "%s: illegal line count.", optarg); + break; + case 'a': /* Suffix length. */ + if (!isdigit((unsigned char)optarg[0]) || + (sfxlen = (size_t)strtoul(optarg, &ep, 10)) == 0 || + *ep != '\0') + errx(1, "%s: illegal suffix length.", optarg); + break; + case 'n': /* Chunks. */ + if (!isdigit((unsigned char)optarg[0]) || + (chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 || + *ep != '\0') + errx(1, "%s: illegal number of chunks.", optarg); + break; + default: + usage(); + } + argv += optind; + argc -= optind; + + if (*argv != NULL) { + if (strcmp(*argv, "-") != 0 && + (ifd = open(*argv, O_RDONLY, 0)) < 0) + err(1, "%s", *argv); + ++argv; + } + + + base = (*argv != NULL) ? *argv++ : "x"; + if ((fname = malloc(strlen(base) + sfxlen + 1)) == NULL) + err(EXIT_FAILURE, NULL); + (void)strcpy(fname, base); /* File name prefix. */ + + if (*argv != NULL) + usage(); + + if (numlines == 0) + numlines = DEFLINE; + else if (bytecnt || chunks) + usage(); + + if (bytecnt && chunks) + usage(); + + if (bytecnt) + split1(bytecnt, 0); + else if (chunks) + split3(chunks); + else + split2(numlines); + + return 0; +} + +/* + * split1 -- + * Split the input by bytes. + */ +static void +split1(off_t bytecnt, int maxcnt) +{ + off_t bcnt; + ssize_t dist, len; + char *C; + char bfr[MAXBSIZE]; + int nfiles; + + nfiles = 0; + + for (bcnt = 0;;) + switch (len = read(ifd, bfr, MAXBSIZE)) { + case 0: + exit(0); + /* NOTREACHED */ + case -1: + err(1, "read"); + /* NOTREACHED */ + default: + if (!file_open) { + if (!maxcnt || (nfiles < maxcnt)) { + newfile(); + nfiles++; + file_open = 1; + } + } + if (bcnt + len >= bytecnt) { + /* LINTED: bytecnt - bcnt <= len */ + dist = bytecnt - bcnt; + if (bigwrite(ofd, bfr, dist) != (size_t)dist) + err(1, "write"); + len -= dist; + for (C = bfr + dist; len >= bytecnt; + /* LINTED: bytecnt <= len */ + len -= bytecnt, C += bytecnt) { + if (!maxcnt || (nfiles < maxcnt)) { + newfile(); + nfiles++; + } + /* LINTED: as above */ + if (bigwrite(ofd, + C, bytecnt) != (size_t)bytecnt) + err(1, "write"); + } + if (len) { + if (!maxcnt || (nfiles < maxcnt)) { + newfile(); + nfiles++; + } + /* LINTED: len >= 0 */ + if (bigwrite(ofd, C, len) != (size_t)len) + err(1, "write"); + } else + file_open = 0; + bcnt = len; + } else { + bcnt += len; + /* LINTED: len >= 0 */ + if (bigwrite(ofd, bfr, len) != (size_t)len) + err(1, "write"); + } + } +} + +/* + * split2 -- + * Split the input by lines. + */ +static void +split2(off_t numlines) +{ + off_t lcnt; + size_t bcnt; + ssize_t len; + char *Ce, *Cs; + char bfr[MAXBSIZE]; + + for (lcnt = 0;;) + switch (len = read(ifd, bfr, MAXBSIZE)) { + case 0: + exit(0); + /* NOTREACHED */ + case -1: + err(1, "read"); + /* NOTREACHED */ + default: + if (!file_open) { + newfile(); + file_open = 1; + } + for (Cs = Ce = bfr; len--; Ce++) + if (*Ce == '\n' && ++lcnt == numlines) { + bcnt = Ce - Cs + 1; + if (bigwrite(ofd, Cs, bcnt) != (size_t)bcnt) + err(1, "write"); + lcnt = 0; + Cs = Ce + 1; + if (len) + newfile(); + else + file_open = 0; + } + if (Cs < Ce) { + bcnt = Ce - Cs; + if (bigwrite(ofd, Cs, bcnt) != (size_t)bcnt) + err(1, "write"); + } + } +} + +/* + * split3 -- + * Split the input into specified number of chunks + */ +static void +split3(off_t chunks) +{ + struct stat sb; + + if (fstat(ifd, &sb) == -1) { + err(1, "stat"); + /* NOTREACHED */ + } + + if (chunks > sb.st_size) { + errx(1, "can't split into more than %d files", + (int)sb.st_size); + /* NOTREACHED */ + } + + split1(sb.st_size/chunks, chunks); +} + +/* + * newfile -- + * Open a new output file. + */ +static void +newfile(void) +{ + static int fnum; + static char *fpnt; + int quot, i; + + if (ofd == -1) { + fpnt = fname + strlen(fname); + fpnt[sfxlen] = '\0'; + } else if (close(ofd) != 0) + err(1, "%s", fname); + + quot = fnum; + for (i = sfxlen - 1; i >= 0; i--) { + fpnt[i] = quot % 26 + 'a'; + quot = quot / 26; + } + if (quot > 0) + errx(1, "too many files."); + ++fnum; + if ((ofd = open(fname, O_WRONLY | O_CREAT | O_TRUNC, DEFFILEMODE)) < 0) + err(1, "%s", fname); +} + +static size_t +bigwrite(int fd, const void *buf, size_t len) +{ + const char *ptr = buf; + size_t sofar = 0; + ssize_t w; + + while (len != 0) { + if ((w = write(fd, ptr, len)) == -1) + return sofar; + len -= w; + ptr += w; + sofar += w; + } + return sofar; +} + + +static void +usage(void) +{ + (void)fprintf(stderr, +"usage: %s [-b byte_count] [-l line_count] [-n chunk_count] [-a suffix_length] " +"[file [prefix]]\n", getprogname()); + exit(1); +} -- cgit v1.2.3-70-g09d2