From 5b57d28ffb6e1ef86b50f7d05d977826eae89bfe Mon Sep 17 00:00:00 2001 From: Kiyoshi Aman Date: Fri, 1 Feb 2019 22:55:37 +0000 Subject: initial population --- bin/pax/ar_io.c | 1725 +++++++++++++++++++++++++++++++++++++++ bin/pax/ar_subs.c | 1449 +++++++++++++++++++++++++++++++++ bin/pax/buf_subs.c | 1022 +++++++++++++++++++++++ bin/pax/cpio.1 | 307 +++++++ bin/pax/cpio.c | 1134 ++++++++++++++++++++++++++ bin/pax/cpio.h | 149 ++++ bin/pax/dumptar.c | 131 +++ bin/pax/extern.h | 326 ++++++++ bin/pax/file_subs.c | 1156 ++++++++++++++++++++++++++ bin/pax/ftree.c | 741 +++++++++++++++++ bin/pax/ftree.h | 48 ++ bin/pax/gen_subs.c | 437 ++++++++++ bin/pax/getoldopt.c | 92 +++ bin/pax/options.c | 2229 +++++++++++++++++++++++++++++++++++++++++++++++++++ bin/pax/options.h | 116 +++ bin/pax/pat_rep.c | 1139 ++++++++++++++++++++++++++ bin/pax/pat_rep.h | 51 ++ bin/pax/pax.1 | 1304 ++++++++++++++++++++++++++++++ bin/pax/pax.c | 492 ++++++++++++ bin/pax/pax.h | 283 +++++++ bin/pax/sel_subs.c | 617 ++++++++++++++ bin/pax/sel_subs.h | 69 ++ bin/pax/tables.c | 1379 +++++++++++++++++++++++++++++++ bin/pax/tables.h | 176 ++++ bin/pax/tar.1 | 372 +++++++++ bin/pax/tar.c | 1430 +++++++++++++++++++++++++++++++++ bin/pax/tar.h | 154 ++++ bin/pax/tty_subs.c | 200 +++++ 28 files changed, 18728 insertions(+) create mode 100644 bin/pax/ar_io.c create mode 100644 bin/pax/ar_subs.c create mode 100644 bin/pax/buf_subs.c create mode 100644 bin/pax/cpio.1 create mode 100644 bin/pax/cpio.c create mode 100644 bin/pax/cpio.h create mode 100644 bin/pax/dumptar.c create mode 100644 bin/pax/extern.h create mode 100644 bin/pax/file_subs.c create mode 100644 bin/pax/ftree.c create mode 100644 bin/pax/ftree.h create mode 100644 bin/pax/gen_subs.c create mode 100644 bin/pax/getoldopt.c create mode 100644 bin/pax/options.c create mode 100644 bin/pax/options.h create mode 100644 bin/pax/pat_rep.c create mode 100644 bin/pax/pat_rep.h create mode 100644 bin/pax/pax.1 create mode 100644 bin/pax/pax.c create mode 100644 bin/pax/pax.h create mode 100644 bin/pax/sel_subs.c create mode 100644 bin/pax/sel_subs.h create mode 100644 bin/pax/tables.c create mode 100644 bin/pax/tables.h create mode 100644 bin/pax/tar.1 create mode 100644 bin/pax/tar.c create mode 100644 bin/pax/tar.h create mode 100644 bin/pax/tty_subs.c (limited to 'bin/pax') diff --git a/bin/pax/ar_io.c b/bin/pax/ar_io.c new file mode 100644 index 0000000..d839fa0 --- /dev/null +++ b/bin/pax/ar_io.c @@ -0,0 +1,1725 @@ +/* $NetBSD: ar_io.c,v 1.58 2017/10/02 21:57:59 joerg Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +#if !defined(lint) +#if 0 +static char sccsid[] = "@(#)ar_io.c 8.2 (Berkeley) 4/18/94"; +#else +__RCSID("$NetBSD: ar_io.c,v 1.58 2017/10/02 21:57:59 joerg Exp $"); +#endif +#endif /* not lint */ + +#include +#include +#ifdef HAVE_SYS_MTIO_H +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef SUPPORT_RMT +#define __RMTLIB_PRIVATE +#include +#endif /* SUPPORT_RMT */ +#include "pax.h" +#include "options.h" +#include "extern.h" + +/* + * Routines which deal directly with the archive I/O device/file. + */ + +#define DMOD 0666 /* default mode of created archives */ +#define EXT_MODE O_RDONLY /* open mode for list/extract */ +#define AR_MODE (O_WRONLY | O_CREAT | O_TRUNC) /* mode for archive */ +#define APP_MODE O_RDWR /* mode for append */ +static char STDO[] = ""; /* pseudo name for stdout */ +static char STDN[] = ""; /* pseudo name for stdin */ +static char NONE[] = ""; /* pseudo name for none */ +static int arfd = -1; /* archive file descriptor */ +static int artyp = ISREG; /* archive type: file/FIFO/tape */ +static int arvol = 1; /* archive volume number */ +static int lstrval = -1; /* return value from last i/o */ +static int io_ok; /* i/o worked on volume after resync */ +static int did_io; /* did i/o ever occur on volume? */ +static int done; /* set via tty termination */ +static struct stat arsb; /* stat of archive device at open */ +static int invld_rec; /* tape has out of spec record size */ +static int wr_trail = 1; /* trailer was rewritten in append */ +static int can_unlnk = 0; /* do we unlink null archives? */ +const char *arcname; /* printable name of archive */ +const char *gzip_program; /* name of gzip program */ +static pid_t zpid = -1; /* pid of child process */ +time_t starttime; /* time the run started */ +int force_one_volume; /* 1 if we ignore volume changes */ + +#ifdef HAVE_SYS_MTIO_H +static int get_phys(void); +#endif +extern sigset_t s_mask; +static void ar_start_gzip(int, const char *, int); +static const char *timefmt(char *, size_t, off_t, time_t, const char *); +static const char *sizefmt(char *, size_t, off_t); + +#ifdef SUPPORT_RMT +#ifdef SYS_NO_RESTART +static int rmtread_with_restart(int, void *, int); +static int rmtwrite_with_restart(int, void *, int); +#else +#define rmtread_with_restart(a, b, c) rmtread((a), (b), (c)) +#define rmtwrite_with_restart(a, b, c) rmtwrite((a), (b), (c)) +#endif +#endif /* SUPPORT_RMT */ + +/* + * ar_open() + * Opens the next archive volume. Determines the type of the device and + * sets up block sizes as required by the archive device and the format. + * Note: we may be called with name == NULL on the first open only. + * Return: + * -1 on failure, 0 otherwise + */ + +int +ar_open(const char *name) +{ +#ifdef HAVE_SYS_MTIO_H + struct mtget mb; +#endif + + if (arfd != -1) + (void)close(arfd); + arfd = -1; + can_unlnk = did_io = io_ok = invld_rec = 0; + artyp = ISREG; + flcnt = 0; + +#ifdef SUPPORT_RMT + if (name && strchr(name, ':') != NULL && !forcelocal) { + artyp = ISRMT; + if ((arfd = rmtopen(name, O_RDWR, DMOD)) == -1) { + syswarn(0, errno, "Failed open on %s", name); + return -1; + } + if (!isrmt(arfd)) { + rmtclose(arfd); + tty_warn(0, "Not a remote file: %s", name); + return -1; + } + blksz = rdblksz = 8192; + lstrval = 1; + return 0; + } +#endif /* SUPPORT_RMT */ + + /* + * open based on overall operation mode + */ + switch (act) { + case LIST: + case EXTRACT: + if (name == NULL) { + arfd = STDIN_FILENO; + arcname = STDN; + } else if ((arfd = open(name, EXT_MODE, DMOD)) < 0) + syswarn(0, errno, "Failed open to read on %s", name); + if (arfd != -1 && gzip_program != NULL) + ar_start_gzip(arfd, gzip_program, 0); + break; + case ARCHIVE: + if (name == NULL) { + arfd = STDOUT_FILENO; + arcname = STDO; + } else if ((arfd = open(name, AR_MODE, DMOD)) < 0) + syswarn(0, errno, "Failed open to write on %s", name); + else + can_unlnk = 1; + if (arfd != -1 && gzip_program != NULL) + ar_start_gzip(arfd, gzip_program, 1); + break; + case APPND: + if (name == NULL) { + arfd = STDOUT_FILENO; + arcname = STDO; + } else if ((arfd = open(name, APP_MODE, DMOD)) < 0) + syswarn(0, errno, "Failed open to read/write on %s", + name); + break; + case COPY: + /* + * arfd not used in COPY mode + */ + arcname = NONE; + lstrval = 1; + return 0; + } + if (arfd < 0) + return -1; + + if (chdname != NULL) + if (dochdir(chdname) == -1) + return -1; + /* + * set up is based on device type + */ + if (fstat(arfd, &arsb) < 0) { + syswarn(0, errno, "Failed stat on %s", arcname); + (void)close(arfd); + arfd = -1; + can_unlnk = 0; + return -1; + } + if (S_ISDIR(arsb.st_mode)) { + tty_warn(0, "Cannot write an archive on top of a directory %s", + arcname); + (void)close(arfd); + arfd = -1; + can_unlnk = 0; + return -1; + } + + if (S_ISCHR(arsb.st_mode)) { +#ifdef HAVE_SYS_MTIO_H + artyp = ioctl(arfd, MTIOCGET, &mb) ? ISCHR : ISTAPE; +#else + tty_warn(1, "System does not have tape support"); + artyp = ISREG; +#endif + } else if (S_ISBLK(arsb.st_mode)) + artyp = ISBLK; + else if ((lseek(arfd, (off_t)0L, SEEK_CUR) == -1) && (errno == ESPIPE)) + artyp = ISPIPE; + else + artyp = ISREG; + + /* + * Special handling for empty files. + */ + if (artyp == ISREG && arsb.st_size == 0) { + switch (act) { + case LIST: + case EXTRACT: + return -1; + case APPND: + act = -ARCHIVE; + return -1; + case ARCHIVE: + break; + } + } + + /* + * make sure beyond any doubt that we can unlink only regular files + * we created + */ + if (artyp != ISREG) + can_unlnk = 0; + + /* + * if we are writing, we are done + */ + if (act == ARCHIVE) { + blksz = rdblksz = wrblksz; + lstrval = 1; + return 0; + } + + /* + * set default blksz on read. APPNDs writes rdblksz on the last volume + * On all new archive volumes, we shift to wrblksz (if the user + * specified one, otherwize we will continue to use rdblksz). We + * must set blocksize based on what kind of device the archive is + * stored. + */ + switch(artyp) { + case ISTAPE: + /* + * Tape drives come in at least two flavors. Those that support + * variable sized records and those that have fixed sized + * records. They must be treated differently. For tape drives + * that support variable sized records, we must make large + * reads to make sure we get the entire record, otherwise we + * will just get the first part of the record (up to size we + * asked). Tapes with fixed sized records may or may not return + * multiple records in a single read. We really do not care + * what the physical record size is UNLESS we are going to + * append. (We will need the physical block size to rewrite + * the trailer). Only when we are appending do we go to the + * effort to figure out the true PHYSICAL record size. + */ + blksz = rdblksz = MAXBLK; + break; + case ISPIPE: + case ISBLK: + case ISCHR: + /* + * Blocksize is not a major issue with these devices (but must + * be kept a multiple of 512). If the user specified a write + * block size, we use that to read. Under append, we must + * always keep blksz == rdblksz. Otherwise we go ahead and use + * the device optimal blocksize as (and if) returned by stat + * and if it is within pax specs. + */ + if ((act == APPND) && wrblksz) { + blksz = rdblksz = wrblksz; + break; + } + + if ((arsb.st_blksize > 0) && (arsb.st_blksize < MAXBLK) && + ((arsb.st_blksize % BLKMULT) == 0)) + rdblksz = arsb.st_blksize; + else + rdblksz = DEVBLK; + /* + * For performance go for large reads when we can without harm + */ + if ((act == APPND) || (artyp == ISCHR)) + blksz = rdblksz; + else + blksz = MAXBLK; + break; + case ISREG: + /* + * if the user specified wrblksz works, use it. Under appends + * we must always keep blksz == rdblksz + */ + if ((act == APPND) && wrblksz && ((arsb.st_size%wrblksz)==0)){ + blksz = rdblksz = wrblksz; + break; + } + /* + * See if we can find the blocking factor from the file size + */ + for (rdblksz = MAXBLK; rdblksz > 0; rdblksz -= BLKMULT) + if ((arsb.st_size % rdblksz) == 0) + break; + /* + * When we cannot find a match, we may have a flawed archive. + */ + if (rdblksz <= 0) + rdblksz = FILEBLK; + /* + * for performance go for large reads when we can + */ + if (act == APPND) + blksz = rdblksz; + else + blksz = MAXBLK; + break; + default: + /* + * should never happen, worst case, slow... + */ + blksz = rdblksz = BLKMULT; + break; + } + lstrval = 1; + return 0; +} + +/* + * ar_close() + * closes archive device, increments volume number, and prints i/o summary + */ +void +ar_close(void) +{ + int status; + + if (arfd < 0) { + did_io = io_ok = flcnt = 0; + return; + } + + + /* + * Close archive file. This may take a LONG while on tapes (we may be + * forced to wait for the rewind to complete) so tell the user what is + * going on (this avoids the user hitting control-c thinking pax is + * broken). + */ + if ((vflag || Vflag) && (artyp == ISTAPE)) { + if (vfpart) + (void)putc('\n', listf); + (void)fprintf(listf, + "%s: Waiting for tape drive close to complete...", + argv0); + (void)fflush(listf); + } + + /* + * if nothing was written to the archive (and we created it), we remove + * it + */ + if (can_unlnk && (fstat(arfd, &arsb) == 0) && (S_ISREG(arsb.st_mode)) && + (arsb.st_size == 0)) { + (void)unlink(arcname); + can_unlnk = 0; + } + + /* + * for a quick extract/list, pax frequently exits before the child + * process is done + */ + if ((act == LIST || act == EXTRACT) && nflag && zpid > 0) + kill(zpid, SIGINT); + +#ifdef SUPPORT_RMT + if (artyp == ISRMT) + (void)rmtclose(arfd); + else +#endif /* SUPPORT_RMT */ + (void)close(arfd); + + /* Do not exit before child to ensure data integrity */ + if (zpid > 0) + waitpid(zpid, &status, 0); + + if ((vflag || Vflag) && (artyp == ISTAPE)) { + (void)fputs("done.\n", listf); + vfpart = 0; + (void)fflush(listf); + } + arfd = -1; + + if (!io_ok && !did_io) { + flcnt = 0; + return; + } + did_io = io_ok = 0; + + /* + * The volume number is only increased when the last device has data + * and we have already determined the archive format. + */ + if (frmt != NULL) + ++arvol; + + if (!vflag && !Vflag) { + flcnt = 0; + return; + } + + /* + * Print out a summary of I/O for this archive volume. + */ + if (vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + + /* mimic cpio's block count first */ + if (frmt && strcmp(NM_CPIO, argv0) == 0) { + (void)fprintf(listf, OFFT_F " blocks\n", + (rdcnt ? rdcnt : wrcnt) / 5120); + } + + ar_summary(0); + + (void)fflush(listf); + flcnt = 0; +} + +/* + * ar_drain() + * drain any archive format independent padding from an archive read + * from a socket or a pipe. This is to prevent the process on the + * other side of the pipe from getting a SIGPIPE (pax will stop + * reading an archive once a format dependent trailer is detected). + */ +void +ar_drain(void) +{ + int res; + char drbuf[MAXBLK]; + + /* + * we only drain from a pipe/socket. Other devices can be closed + * without reading up to end of file. We sure hope that pipe is closed + * on the other side so we will get an EOF. + */ + if ((artyp != ISPIPE) || (lstrval <= 0)) + return; + + /* + * keep reading until pipe is drained + */ +#ifdef SUPPORT_RMT + if (artyp == ISRMT) { + while ((res = rmtread_with_restart(arfd, + drbuf, sizeof(drbuf))) > 0) + continue; + } else { +#endif /* SUPPORT_RMT */ + while ((res = read_with_restart(arfd, + drbuf, sizeof(drbuf))) > 0) + continue; +#ifdef SUPPORT_RMT + } +#endif /* SUPPORT_RMT */ + lstrval = res; +} + +/* + * ar_set_wr() + * Set up device right before switching from read to write in an append. + * device dependent code (if required) to do this should be added here. + * For all archive devices we are already positioned at the place we want + * to start writing when this routine is called. + * Return: + * 0 if all ready to write, -1 otherwise + */ + +int +ar_set_wr(void) +{ + off_t cpos; + + /* + * we must make sure the trailer is rewritten on append, ar_next() + * will stop us if the archive containing the trailer was not written + */ + wr_trail = 0; + + /* + * Add any device dependent code as required here + */ + if (artyp != ISREG) + return 0; + /* + * Ok we have an archive in a regular file. If we were rewriting a + * file, we must get rid of all the stuff after the current offset + * (it was not written by pax). + */ + if (((cpos = lseek(arfd, (off_t)0L, SEEK_CUR)) < 0) || + (ftruncate(arfd, cpos) < 0)) { + syswarn(1, errno, "Unable to truncate archive file"); + return -1; + } + return 0; +} + +/* + * ar_app_ok() + * check if the last volume in the archive allows appends. We cannot check + * this until we are ready to write since there is no spec that says all + * volumes in a single archive have to be of the same type... + * Return: + * 0 if we can append, -1 otherwise. + */ + +int +ar_app_ok(void) +{ + if (artyp == ISPIPE) { + tty_warn(1, + "Cannot append to an archive obtained from a pipe."); + return -1; + } + + if (!invld_rec) + return 0; + tty_warn(1, + "Cannot append, device record size %d does not support %s spec", + rdblksz, argv0); + return -1; +} + +#ifdef SYS_NO_RESTART +/* + * read_with_restart() + * Equivalent to read() but does retry on signals. + * This function is not needed on 4.2BSD and later. + * Return: + * Number of bytes written. -1 indicates an error. + */ + +int +read_with_restart(int fd, void *buf, int bsz) +{ + int r; + + while (((r = read(fd, buf, bsz)) < 0) && errno == EINTR) + continue; + + return r; +} + +/* + * rmtread_with_restart() + * Equivalent to rmtread() but does retry on signals. + * This function is not needed on 4.2BSD and later. + * Return: + * Number of bytes written. -1 indicates an error. + */ +static int +rmtread_with_restart(int fd, void *buf, int bsz) +{ + int r; + + while (((r = rmtread(fd, buf, bsz)) < 0) && errno == EINTR) + continue; + + return r; +} +#endif + +/* + * xread() + * Equivalent to read() but does retry on partial read, which may occur + * on signals. + * Return: + * Number of bytes read. 0 for end of file, -1 for an error. + */ + +int +xread(int fd, void *buf, int bsz) +{ + char *b = buf; + int nread = 0; + int r; + + do { +#ifdef SUPPORT_RMT + if ((r = rmtread_with_restart(fd, b, bsz)) <= 0) + break; +#else + if ((r = read_with_restart(fd, b, bsz)) <= 0) + break; +#endif /* SUPPORT_RMT */ + b += r; + bsz -= r; + nread += r; + } while (bsz > 0); + + return nread ? nread : r; +} + +#ifdef SYS_NO_RESTART +/* + * write_with_restart() + * Equivalent to write() but does retry on signals. + * This function is not needed on 4.2BSD and later. + * Return: + * Number of bytes written. -1 indicates an error. + */ + +int +write_with_restart(int fd, void *buf, int bsz) +{ + int r; + + while (((r = write(fd, buf, bsz)) < 0) && errno == EINTR) + ; + + return r; +} + +/* + * rmtwrite_with_restart() + * Equivalent to write() but does retry on signals. + * This function is not needed on 4.2BSD and later. + * Return: + * Number of bytes written. -1 indicates an error. + */ + +static int +rmtwrite_with_restart(int fd, void *buf, int bsz) +{ + int r; + + while (((r = rmtwrite(fd, buf, bsz)) < 0) && errno == EINTR) + ; + + return r; +} +#endif + +/* + * xwrite() + * Equivalent to write() but does retry on partial write, which may occur + * on signals. + * Return: + * Number of bytes written. -1 indicates an error. + */ + +int +xwrite(int fd, void *buf, int bsz) +{ + char *b = buf; + int written = 0; + int r; + + do { +#ifdef SUPPORT_RMT + if ((r = rmtwrite_with_restart(fd, b, bsz)) <= 0) + break; +#else + if ((r = write_with_restart(fd, b, bsz)) <= 0) + break; +#endif /* SUPPORT_RMT */ + b += r; + bsz -= r; + written += r; + } while (bsz > 0); + + return written ? written : r; +} + +/* + * ar_read() + * read up to a specified number of bytes from the archive into the + * supplied buffer. When dealing with tapes we may not always be able to + * read what we want. + * Return: + * Number of bytes in buffer. 0 for end of file, -1 for a read error. + */ + +int +ar_read(char *buf, int cnt) +{ + int res = 0; + + /* + * if last i/o was in error, no more reads until reset or new volume + */ + if (lstrval <= 0) + return lstrval; + + /* + * how we read must be based on device type + */ + switch (artyp) { +#ifdef SUPPORT_RMT + case ISRMT: + if ((res = rmtread_with_restart(arfd, buf, cnt)) > 0) { + io_ok = 1; + return res; + } + break; +#endif /* SUPPORT_RMT */ + case ISTAPE: + if ((res = read_with_restart(arfd, buf, cnt)) > 0) { + /* + * CAUTION: tape systems may not always return the same + * sized records so we leave blksz == MAXBLK. The + * physical record size that a tape drive supports is + * very hard to determine in a uniform and portable + * manner. + */ + io_ok = 1; + if (res != rdblksz) { + /* + * Record size changed. If this happens on + * any record after the first, we probably have + * a tape drive which has a fixed record size + * (we are getting multiple records in a single + * read). Watch out for record blocking that + * violates pax spec (must be a multiple of + * BLKMULT). + */ + rdblksz = res; + if (rdblksz % BLKMULT) + invld_rec = 1; + } + return res; + } + break; + case ISREG: + case ISBLK: + case ISCHR: + case ISPIPE: + default: + /* + * Files are so easy to deal with. These other things cannot + * be trusted at all. So when we are dealing with character + * devices and pipes we just take what they have ready for us + * and return. Trying to do anything else with them runs the + * risk of failure. + */ + if ((res = read_with_restart(arfd, buf, cnt)) > 0) { + io_ok = 1; + return res; + } + break; + } + + /* + * We are in trouble at this point, something is broken... + */ + lstrval = res; + if (res < 0) + syswarn(1, errno, "Failed read on archive volume %d", arvol); + else + tty_warn(0, "End of archive volume %d reached", arvol); + return res; +} + +/* + * ar_write() + * Write a specified number of bytes in supplied buffer to the archive + * device so it appears as a single "block". Deals with errors and tries + * to recover when faced with short writes. + * Return: + * Number of bytes written. 0 indicates end of volume reached and with no + * flaws (as best that can be detected). A -1 indicates an unrecoverable + * error in the archive occurred. + */ + +int +ar_write(char *buf, int bsz) +{ + int res; + off_t cpos; + + /* + * do not allow pax to create a "bad" archive. Once a write fails on + * an archive volume prevent further writes to it. + */ + if (lstrval <= 0) + return lstrval; + + if ((res = xwrite(arfd, buf, bsz)) == bsz) { + wr_trail = 1; + io_ok = 1; + return bsz; + } + /* + * write broke, see what we can do with it. We try to send any partial + * writes that may violate pax spec to the next archive volume. + */ + if (res < 0) + lstrval = res; + else + lstrval = 0; + + switch (artyp) { + case ISREG: + if ((res > 0) && (res % BLKMULT)) { + /* + * try to fix up partial writes which are not BLKMULT + * in size by forcing the runt record to next archive + * volume + */ + if ((cpos = lseek(arfd, (off_t)0L, SEEK_CUR)) < 0) + break; + cpos -= (off_t)res; + if (ftruncate(arfd, cpos) < 0) + break; + res = lstrval = 0; + break; + } + if (res >= 0) + break; + /* + * if file is out of space, handle it like a return of 0 + */ + if ((errno == ENOSPC) || (errno == EFBIG)) + res = lstrval = 0; +#ifdef EDQUOT + if (errno == EDQUOT) + res = lstrval = 0; +#endif + break; + case ISTAPE: + case ISCHR: + case ISBLK: +#ifdef SUPPORT_RMT + case ISRMT: +#endif /* SUPPORT_RMT */ + if (res >= 0) + break; + if (errno == EACCES) { + tty_warn(0, + "Write failed, archive is write protected."); + res = lstrval = 0; + return 0; + } + /* + * see if we reached the end of media, if so force a change to + * the next volume + */ + if ((errno == ENOSPC) || (errno == EIO) || (errno == ENXIO)) + res = lstrval = 0; + break; + case ISPIPE: + default: + /* + * we cannot fix errors to these devices + */ + break; + } + + /* + * Better tell the user the bad news... + * if this is a block aligned archive format, we may have a bad archive + * if the format wants the header to start at a BLKMULT boundary. While + * we can deal with the mis-aligned data, it violates spec and other + * archive readers will likely fail. if the format is not block + * aligned, the user may be lucky (and the archive is ok). + */ + if (res >= 0) { + if (res > 0) + wr_trail = 1; + io_ok = 1; + } + + /* + * If we were trying to rewrite the trailer and it didn't work, we + * must quit right away. + */ + if (!wr_trail && (res <= 0)) { + tty_warn(1, + "Unable to append, trailer re-write failed. Quitting."); + return res; + } + + if (res == 0) + tty_warn(0, "End of archive volume %d reached", arvol); + else if (res < 0) + syswarn(1, errno, "Failed write to archive volume: %d", arvol); + else if (!frmt->blkalgn || ((res % frmt->blkalgn) == 0)) + tty_warn(0, + "WARNING: partial archive write. Archive MAY BE FLAWED"); + else + tty_warn(1,"WARNING: partial archive write. Archive IS FLAWED"); + return res; +} + +/* + * ar_rdsync() + * Try to move past a bad spot on a flawed archive as needed to continue + * I/O. Clears error flags to allow I/O to continue. + * Return: + * 0 when ok to try i/o again, -1 otherwise. + */ + +int +ar_rdsync(void) +{ + long fsbz; + off_t cpos; + off_t mpos; +#ifdef HAVE_SYS_MTIO_H + struct mtop mb; +#endif + + /* + * Fail resync attempts at user request (done) or if this is going to be + * an update/append to a existing archive. if last i/o hit media end, + * we need to go to the next volume not try a resync + */ + if ((done > 0) || (lstrval == 0)) + return -1; + + if ((act == APPND) || (act == ARCHIVE)) { + tty_warn(1, "Cannot allow updates to an archive with flaws."); + return -1; + } + if (io_ok) + did_io = 1; + + switch(artyp) { +#ifdef SUPPORT_RMT + case ISRMT: +#endif /* SUPPORT_RMT */ + case ISTAPE: +#ifdef HAVE_SYS_MTIO_H + /* + * if the last i/o was a successful data transfer, we assume + * the fault is just a bad record on the tape that we are now + * past. If we did not get any data since the last resync try + * to move the tape forward one PHYSICAL record past any + * damaged tape section. Some tape drives are stubborn and need + * to be pushed. + */ + if (io_ok) { + io_ok = 0; + lstrval = 1; + break; + } + mb.mt_op = MTFSR; + mb.mt_count = 1; +#ifdef SUPPORT_RMT + if (artyp == ISRMT) { + if (rmtioctl(arfd, MTIOCTOP, &mb) < 0) + break; + } else { +#endif /* SUPPORT_RMT */ + if (ioctl(arfd, MTIOCTOP, &mb) < 0) + break; +#ifdef SUPPORT_RMT + } +#endif /* SUPPORT_RMT */ + lstrval = 1; +#else + tty_warn(1, "System does not have tape support"); +#endif + break; + case ISREG: + case ISCHR: + case ISBLK: + /* + * try to step over the bad part of the device. + */ + io_ok = 0; + if (((fsbz = arsb.st_blksize) <= 0) || (artyp != ISREG)) + fsbz = BLKMULT; + if ((cpos = lseek(arfd, (off_t)0L, SEEK_CUR)) < 0) + break; + mpos = fsbz - (cpos % (off_t)fsbz); + if (lseek(arfd, mpos, SEEK_CUR) < 0) + break; + lstrval = 1; + break; + case ISPIPE: + default: + /* + * cannot recover on these archive device types + */ + io_ok = 0; + break; + } + if (lstrval <= 0) { + tty_warn(1, "Unable to recover from an archive read failure."); + return -1; + } + tty_warn(0, "Attempting to recover from an archive read failure."); + return 0; +} + +/* + * ar_fow() + * Move the I/O position within the archive forward the specified number of + * bytes as supported by the device. If we cannot move the requested + * number of bytes, return the actual number of bytes moved in skipped. + * Return: + * 0 if moved the requested distance, -1 on complete failure, 1 on + * partial move (the amount moved is in skipped) + */ + +int +ar_fow(off_t sksz, off_t *skipped) +{ + off_t cpos; + off_t mpos; + + *skipped = 0; + if (sksz <= 0) + return 0; + + /* + * we cannot move forward at EOF or error + */ + if (lstrval <= 0) + return lstrval; + + /* + * Safer to read forward on devices where it is hard to find the end of + * the media without reading to it. With tapes we cannot be sure of the + * number of physical blocks to skip (we do not know physical block + * size at this point), so we must only read forward on tapes! + */ + if (artyp == ISTAPE || artyp == ISPIPE +#ifdef SUPPORT_RMT + || artyp == ISRMT +#endif /* SUPPORT_RMT */ + ) + return 0; + + /* + * figure out where we are in the archive + */ + if ((cpos = lseek(arfd, (off_t)0L, SEEK_CUR)) >= 0) { + /* + * we can be asked to move farther than there are bytes in this + * volume, if so, just go to file end and let normal buf_fill() + * deal with the end of file (it will go to next volume by + * itself) + */ + mpos = cpos + sksz; + if (artyp == ISREG && mpos > arsb.st_size) + mpos = arsb.st_size; + if ((mpos = lseek(arfd, mpos, SEEK_SET)) >= 0) { + *skipped = mpos - cpos; + return 0; + } + } else { + if (artyp != ISREG) + return 0; /* non-seekable device */ + } + syswarn(1, errno, "Forward positioning operation on archive failed"); + lstrval = -1; + return -1; +} + +/* + * ar_rev() + * move the i/o position within the archive backwards the specified byte + * count as supported by the device. With tapes drives we RESET rdblksz to + * the PHYSICAL blocksize. + * NOTE: We should only be called to move backwards so we can rewrite the + * last records (the trailer) of an archive (APPEND). + * Return: + * 0 if moved the requested distance, -1 on complete failure + */ + +int +ar_rev(off_t sksz) +{ + off_t cpos; +#ifdef HAVE_SYS_MTIO_H + int phyblk; + struct mtop mb; +#endif + + /* + * make sure we do not have try to reverse on a flawed archive + */ + if (lstrval < 0) + return lstrval; + + switch(artyp) { + case ISPIPE: + if (sksz <= 0) + break; + /* + * cannot go backwards on these critters + */ + tty_warn(1, "Reverse positioning on pipes is not supported."); + lstrval = -1; + return -1; + case ISREG: + case ISBLK: + case ISCHR: + default: + if (sksz <= 0) + break; + + /* + * For things other than files, backwards movement has a very + * high probability of failure as we really do not know the + * true attributes of the device we are talking to (the device + * may not even have the ability to lseek() in any direction). + * First we figure out where we are in the archive. + */ + if ((cpos = lseek(arfd, (off_t)0L, SEEK_CUR)) < 0) { + syswarn(1, errno, + "Unable to obtain current archive byte offset"); + lstrval = -1; + return -1; + } + + /* + * we may try to go backwards past the start when the archive + * is only a single record. If this happens and we are on a + * multi-volume archive, we need to go to the end of the + * previous volume and continue our movement backwards from + * there. + */ + if ((cpos -= sksz) < (off_t)0L) { + if (arvol > 1) { + /* + * this should never happen + */ + tty_warn(1, + "Reverse position on previous volume."); + lstrval = -1; + return -1; + } + cpos = (off_t)0L; + } + if (lseek(arfd, cpos, SEEK_SET) < 0) { + syswarn(1, errno, "Unable to seek archive backwards"); + lstrval = -1; + return -1; + } + break; + case ISTAPE: +#ifdef SUPPORT_RMT + case ISRMT: +#endif /* SUPPORT_RMT */ +#ifdef HAVE_SYS_MTIO_H + /* + * Calculate and move the proper number of PHYSICAL tape + * blocks. If the sksz is not an even multiple of the physical + * tape size, we cannot do the move (this should never happen). + * (We also cannot handle trailers spread over two vols). + * get_phys() also makes sure we are in front of the filemark. + */ + if ((phyblk = get_phys()) <= 0) { + lstrval = -1; + return -1; + } + + /* + * make sure future tape reads only go by physical tape block + * size (set rdblksz to the real size). + */ + rdblksz = phyblk; + + /* + * if no movement is required, just return (we must be after + * get_phys() so the physical blocksize is properly set) + */ + if (sksz <= 0) + break; + + /* + * ok we have to move. Make sure the tape drive can do it. + */ + if (sksz % phyblk) { + tty_warn(1, + "Tape drive unable to backspace requested amount"); + lstrval = -1; + return -1; + } + + /* + * move backwards the requested number of bytes + */ + mb.mt_op = MTBSR; + mb.mt_count = sksz/phyblk; + if ( +#ifdef SUPPORT_RMT + rmtioctl(arfd, MTIOCTOP, &mb) +#else + ioctl(arfd, MTIOCTOP, &mb) +#endif /* SUPPORT_RMT */ + < 0) { + syswarn(1, errno, "Unable to backspace tape %ld blocks.", + (long) mb.mt_count); + lstrval = -1; + return -1; + } +#else + tty_warn(1, "System does not have tape support"); +#endif + break; + } + lstrval = 1; + return 0; +} + +#ifdef HAVE_SYS_MTIO_H +/* + * get_phys() + * Determine the physical block size on a tape drive. We need the physical + * block size so we know how many bytes we skip over when we move with + * mtio commands. We also make sure we are BEFORE THE TAPE FILEMARK when + * return. + * This is one really SLOW routine... + * Return: + * physical block size if ok (ok > 0), -1 otherwise + */ + +static int +get_phys(void) +{ + int padsz = 0; + int res; + int phyblk; + struct mtop mb; + char scbuf[MAXBLK]; + + /* + * move to the file mark, and then back up one record and read it. + * this should tell us the physical record size the tape is using. + */ + if (lstrval == 1) { + /* + * we know we are at file mark when we get back a 0 from + * read() + */ +#ifdef SUPPORT_RMT + while ((res = rmtread_with_restart(arfd, + scbuf, sizeof(scbuf))) > 0) +#else + while ((res = read_with_restart(arfd, + scbuf, sizeof(scbuf))) > 0) +#endif /* SUPPORT_RMT */ + padsz += res; + if (res < 0) { + syswarn(1, errno, "Unable to locate tape filemark."); + return -1; + } + } + + /* + * move backwards over the file mark so we are at the end of the + * last record. + */ + mb.mt_op = MTBSF; + mb.mt_count = 1; + if ( +#ifdef SUPPORT_RMT + rmtioctl(arfd, MTIOCTOP, &mb) +#else + ioctl(arfd, MTIOCTOP, &mb) +#endif /* SUPPORT_RMT */ + < 0) { + syswarn(1, errno, "Unable to backspace over tape filemark."); + return -1; + } + + /* + * move backwards so we are in front of the last record and read it to + * get physical tape blocksize. + */ + mb.mt_op = MTBSR; + mb.mt_count = 1; + if ( +#ifdef SUPPORT_RMT + rmtioctl(arfd, MTIOCTOP, &mb) +#else + ioctl(arfd, MTIOCTOP, &mb) +#endif /* SUPPORT_RMT */ + < 0) { + syswarn(1, errno, "Unable to backspace over last tape block."); + return -1; + } + if ((phyblk = +#ifdef SUPPORT_RMT + rmtread_with_restart(arfd, scbuf, sizeof(scbuf)) +#else + read_with_restart(arfd, scbuf, sizeof(scbuf)) +#endif /* SUPPORT_RMT */ + ) <= 0) { + syswarn(1, errno, "Cannot determine archive tape blocksize."); + return -1; + } + + /* + * read forward to the file mark, then back up in front of the filemark + * (this is a bit paranoid, but should be safe to do). + */ + while ((res = +#ifdef SUPPORT_RMT + rmtread_with_restart(arfd, scbuf, sizeof(scbuf)) +#else + read_with_restart(arfd, scbuf, sizeof(scbuf)) +#endif /* SUPPORT_RMT */ + ) > 0) + ; + if (res < 0) { + syswarn(1, errno, "Unable to locate tape filemark."); + return -1; + } + mb.mt_op = MTBSF; + mb.mt_count = 1; + if ( +#ifdef SUPPORT_RMT + rmtioctl(arfd, MTIOCTOP, &mb) +#else + ioctl(arfd, MTIOCTOP, &mb) +#endif /* SUPPORT_RMT */ + < 0) { + syswarn(1, errno, "Unable to backspace over tape filemark."); + return -1; + } + + /* + * set lstrval so we know that the filemark has not been seen + */ + lstrval = 1; + + /* + * return if there was no padding + */ + if (padsz == 0) + return phyblk; + + /* + * make sure we can move backwards over the padding. (this should + * never fail). + */ + if (padsz % phyblk) { + tty_warn(1, "Tape drive unable to backspace requested amount"); + return -1; + } + + /* + * move backwards over the padding so the head is where it was when + * we were first called (if required). + */ + mb.mt_op = MTBSR; + mb.mt_count = padsz/phyblk; + if ( +#ifdef SUPPORT_RMT + rmtioctl(arfd, MTIOCTOP, &mb) +#else + ioctl(arfd, MTIOCTOP, &mb) +#endif /* SUPPORT_RMT */ + < 0) { + syswarn(1, errno, + "Unable to backspace tape over %ld pad blocks", + (long)mb.mt_count); + return -1; + } + return phyblk; +} +#endif + +/* + * ar_next() + * prompts the user for the next volume in this archive. For some devices + * we may allow the media to be changed. Otherwise a new archive is + * prompted for. By pax spec, if there is no controlling tty or an eof is + * read on tty input, we must quit pax. + * Return: + * 0 when ready to continue, -1 when all done + */ + +int +ar_next(void) +{ + char buf[PAXPATHLEN+2]; + static char *arcfree = NULL; + sigset_t o_mask; + + /* + * WE MUST CLOSE THE DEVICE. A lot of devices must see last close, (so + * things like writing EOF etc will be done) (Watch out ar_close() can + * also be called via a signal handler, so we must prevent a race. + */ + if (sigprocmask(SIG_BLOCK, &s_mask, &o_mask) < 0) + syswarn(0, errno, "Unable to set signal mask"); + ar_close(); + if (sigprocmask(SIG_SETMASK, &o_mask, NULL) < 0) + syswarn(0, errno, "Unable to restore signal mask"); + + if (done || !wr_trail || force_one_volume) + return -1; + + if (!is_gnutar) + tty_prnt("\nATTENTION! %s archive volume change required.\n", + argv0); + + /* + * if i/o is on stdin or stdout, we cannot reopen it (we do not know + * the name), the user will be forced to type it in. + */ + if (strcmp(arcname, STDO) && strcmp(arcname, STDN) && (artyp != ISREG) + && (artyp != ISPIPE)) { + if (artyp == ISTAPE +#ifdef SUPPORT_RMT + || artyp == ISRMT +#endif /* SUPPORT_RMT */ + ) { + tty_prnt("%s ready for archive tape volume: %d\n", + arcname, arvol); + tty_prnt("Load the NEXT TAPE on the tape drive"); + } else { + tty_prnt("%s ready for archive volume: %d\n", + arcname, arvol); + tty_prnt("Load the NEXT STORAGE MEDIA (if required)"); + } + + if ((act == ARCHIVE) || (act == APPND)) + tty_prnt(" and make sure it is WRITE ENABLED.\n"); + else + tty_prnt("\n"); + + for(;;) { + tty_prnt("Type \"y\" to continue, \".\" to quit %s,", + argv0); + tty_prnt(" or \"s\" to switch to new device.\nIf you"); + tty_prnt(" cannot change storage media, type \"s\"\n"); + tty_prnt("Is the device ready and online? > "); + + if ((tty_read(buf,sizeof(buf))<0) || !strcmp(buf,".")){ + done = 1; + lstrval = -1; + tty_prnt("Quitting %s!\n", argv0); + vfpart = 0; + return -1; + } + + if ((buf[0] == '\0') || (buf[1] != '\0')) { + tty_prnt("%s unknown command, try again\n",buf); + continue; + } + + switch (buf[0]) { + case 'y': + case 'Y': + /* + * we are to continue with the same device + */ + if (ar_open(arcname) >= 0) + return 0; + tty_prnt("Cannot re-open %s, try again\n", + arcname); + continue; + case 's': + case 'S': + /* + * user wants to open a different device + */ + tty_prnt("Switching to a different archive\n"); + break; + default: + tty_prnt("%s unknown command, try again\n",buf); + continue; + } + break; + } + } else { + if (is_gnutar) { + tty_warn(1, "Unexpected EOF on archive file"); + return -1; + } + tty_prnt("Ready for archive volume: %d\n", arvol); + } + + /* + * have to go to a different archive + */ + for (;;) { + tty_prnt("Input archive name or \".\" to quit %s.\n", argv0); + tty_prnt("Archive name > "); + + if ((tty_read(buf, sizeof(buf)) < 0) || !strcmp(buf, ".")) { + done = 1; + lstrval = -1; + tty_prnt("Quitting %s!\n", argv0); + vfpart = 0; + return -1; + } + if (buf[0] == '\0') { + tty_prnt("Empty file name, try again\n"); + continue; + } + if (!strcmp(buf, "..")) { + tty_prnt("Illegal file name: .. try again\n"); + continue; + } + if (strlen(buf) > PAXPATHLEN) { + tty_prnt("File name too long, try again\n"); + continue; + } + + /* + * try to open new archive + */ + if (ar_open(buf) >= 0) { + if (arcfree) { + (void)free(arcfree); + arcfree = NULL; + } + if ((arcfree = strdup(buf)) == NULL) { + done = 1; + lstrval = -1; + tty_warn(0, "Cannot save archive name."); + return -1; + } + arcname = arcfree; + break; + } + tty_prnt("Cannot open %s, try again\n", buf); + continue; + } + return 0; +} + +/* + * ar_start_gzip() + * starts the compression/decompression process as a child, using magic + * to keep the fd the same in the calling function (parent). possible + * programs are GZIP_CMD, BZIP2_CMD, and COMPRESS_CMD. + */ +void +ar_start_gzip(int fd, const char *gzp, int wr) +{ + int fds[2]; + const char *gzip_flags; + + if (pipe(fds) < 0) + err(1, "could not pipe"); + zpid = fork(); + if (zpid < 0) + err(1, "could not fork"); + + /* parent */ + if (zpid) { + if (wr) + dup2(fds[1], fd); + else + dup2(fds[0], fd); + close(fds[0]); + close(fds[1]); + } else { + if (wr) { + dup2(fds[0], STDIN_FILENO); + dup2(fd, STDOUT_FILENO); + gzip_flags = "-c"; + } else { + dup2(fds[1], STDOUT_FILENO); + dup2(fd, STDIN_FILENO); + gzip_flags = "-dc"; + } + close(fds[0]); + close(fds[1]); + if (execlp(gzp, gzp, gzip_flags, NULL) < 0) + err(1, "could not exec"); + /* NOTREACHED */ + } +} + +static const char * +timefmt(char *buf, size_t size, off_t sz, time_t tm, const char *unitstr) +{ + (void)snprintf(buf, size, "%lu secs (" OFFT_F " %s/sec)", + (unsigned long)tm, (OFFT_T)(sz / tm), unitstr); + return buf; +} + +static const char * +sizefmt(char *buf, size_t size, off_t sz) +{ + (void)snprintf(buf, size, OFFT_F " bytes", (OFFT_T)sz); + return buf; +} + +void +ar_summary(int n) +{ + time_t secs; + char buf[BUFSIZ]; + char tbuf[MAXPATHLEN/4]; /* XXX silly size! */ + char s1buf[MAXPATHLEN/8]; /* XXX very silly size! */ + char s2buf[MAXPATHLEN/8]; /* XXX very silly size! */ + FILE *outf; + + if (act == LIST) + outf = stdout; + else + outf = stderr; + + /* + * If we are called from a signal (n != 0), use snprintf(3) so that we + * don't reenter stdio(3). + */ + (void)time(&secs); + if ((secs -= starttime) == 0) + secs = 1; + + /* + * If we have not determined the format yet, we just say how many bytes + * we have skipped over looking for a header to id. there is no way we + * could have written anything yet. + */ + if (frmt == NULL && act != COPY) { + snprintf(buf, sizeof(buf), + "unknown format, %s skipped in %s\n", + sizefmt(s1buf, sizeof(s1buf), rdcnt), + timefmt(tbuf, sizeof(tbuf), rdcnt, secs, "bytes")); + if (n == 0) + (void)fprintf(outf, "%s: %s", argv0, buf); + else + (void)write(STDERR_FILENO, buf, strlen(buf)); + return; + } + + + if (n != 0 && *archd.name) { + snprintf(buf, sizeof(buf), "Working on `%s' (%s)\n", + archd.name, sizefmt(s1buf, sizeof(s1buf), archd.sb.st_size)); + (void)write(STDERR_FILENO, buf, strlen(buf)); + } + + + if (act == COPY) { + snprintf(buf, sizeof(buf), + "%lu files in %s\n", + (unsigned long)flcnt, + timefmt(tbuf, sizeof(tbuf), flcnt, secs, "files")); + } else { + snprintf(buf, sizeof(buf), + "%s vol %d, %lu files, %s read, %s written in %s\n", + frmt->name, arvol-1, (unsigned long)flcnt, + sizefmt(s1buf, sizeof(s1buf), rdcnt), + sizefmt(s2buf, sizeof(s2buf), wrcnt), + timefmt(tbuf, sizeof(tbuf), rdcnt + wrcnt, secs, "bytes")); + } + if (n == 0) + (void)fprintf(outf, "%s: %s", argv0, buf); + else + (void)write(STDERR_FILENO, buf, strlen(buf)); +} + +/* + * ar_dochdir(name) + * change directory to name, and remember where we came from and + * where we change to (for ar_open). + * + * Maybe we could try to be smart and only do the actual chdir + * when necessary to write a file read from the archive, but this + * is not easy to get right given the pax code structure. + * + * Be sure to not leak descriptors! + * + * We are called N * M times when extracting, and N times when + * writing archives, where + * N: number of -C options + * M: number of files in archive + * + * Returns 0 if all went well, else -1. + */ + +int +ar_dochdir(const char *name) +{ + /* First fdochdir() back... */ + if (fdochdir(cwdfd) == -1) + return -1; + if (dochdir(name) == -1) + return -1; + return 0; +} diff --git a/bin/pax/ar_subs.c b/bin/pax/ar_subs.c new file mode 100644 index 0000000..d4ba54b --- /dev/null +++ b/bin/pax/ar_subs.c @@ -0,0 +1,1449 @@ +/* $NetBSD: ar_subs.c,v 1.56 2011/08/31 16:24:54 plunky Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +#if !defined(lint) +#if 0 +static char sccsid[] = "@(#)ar_subs.c 8.2 (Berkeley) 4/18/94"; +#else +__RCSID("$NetBSD: ar_subs.c,v 1.56 2011/08/31 16:24:54 plunky Exp $"); +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "pat_rep.h" +#include "extern.h" + +static int path_check(ARCHD *, int); +static int wr_archive(ARCHD *, int is_app); +static int get_arc(void); +static int next_head(ARCHD *); +#if !HAVE_NBTOOL_CONFIG_H +static int fdochroot(int); +#endif +extern sigset_t s_mask; + +/* + * Routines which control the overall operation modes of pax as specified by + * the user: list, append, read ... + */ + +static char hdbuf[BLKMULT]; /* space for archive header on read */ +u_long flcnt; /* number of files processed */ +ARCHD archd; + +static char cwdpath[MAXPATHLEN]; /* current working directory path */ +static size_t cwdpathlen; /* current working directory path len */ + +int +updatepath(void) +{ + if (getcwd(cwdpath, sizeof(cwdpath)) == NULL) { + syswarn(1, errno, "Cannot get working directory"); + return -1; + } + cwdpathlen = strlen(cwdpath); + return 0; +} + +int +fdochdir(int fcwd) +{ + if (fchdir(fcwd) == -1) { + syswarn(1, errno, "Cannot chdir to `.'"); + return -1; + } + return updatepath(); +} + +int +dochdir(const char *name) +{ + if (chdir(name) == -1) + syswarn(1, errno, "Cannot chdir to `%s'", name); + return updatepath(); +} + +#if !HAVE_NBTOOL_CONFIG_H +static int +fdochroot(int fcwd) +{ + if (fchroot(fcwd) != 0) { + syswarn(1, errno, "Can't fchroot to \".\""); + return -1; + } + return updatepath(); +} +#endif + +/* + * mkdir(), but if we failed, check if someone else made it for us + * already and don't error out. + */ +int +domkdir(const char *fname, mode_t mode) +{ + int error; + struct stat sb; + + if ((error = mkdir(fname, mode)) != -1) + return error; + + switch (errno) { + case EISDIR: + return 0; + case EEXIST: + case EACCES: + case ENOSYS: /* Grr Solaris */ + case EROFS: + error = errno; + if (stat(fname, &sb) != -1 && S_ISDIR(sb.st_mode)) + return 0; + errno = error; + /*FALLTHROUGH*/ + default: + return -1; + } +} + +static int +path_check(ARCHD *arcn, int level) +{ + char buf[MAXPATHLEN]; + char *p; + + if ((p = strrchr(arcn->name, '/')) == NULL) + return 0; + *p = '\0'; + + if (realpath(arcn->name, buf) == NULL) { + int error; + error = path_check(arcn, level + 1); + *p = '/'; + if (error == 0) + return 0; + if (level == 0) + syswarn(1, 0, "Cannot resolve `%s'", arcn->name); + return -1; + } + if (strncmp(buf, cwdpath, cwdpathlen) != 0) { + *p = '/'; + syswarn(1, 0, "Attempt to write file `%s' that resolves into " + "`%s/%s' outside current working directory `%s' ignored", + arcn->name, buf, p + 1, cwdpath); + return -1; + } + *p = '/'; + return 0; +} + +/* + * list() + * list the contents of an archive which match user supplied pattern(s) + * (if no pattern is supplied, list entire contents). + */ + +int +list(void) +{ + ARCHD *arcn; + int res; + time_t now; + + arcn = &archd; + /* + * figure out archive type; pass any format specific options to the + * archive option processing routine; call the format init routine. We + * also save current time for ls_list() so we do not make a system + * call for each file we need to print. If verbose (vflag) start up + * the name and group caches. + */ + if ((get_arc() < 0) || ((*frmt->options)() < 0) || + ((*frmt->st_rd)() < 0)) + return 1; + + now = time(NULL); + + /* + * step through the archive until the format says it is done + */ + while (next_head(arcn) == 0) { + if (arcn->type == PAX_GLL || arcn->type == PAX_GLF) { + /* + * we need to read, to get the real filename + */ + off_t cnt; + if (!(*frmt->rd_data)(arcn, -arcn->type, &cnt)) + (void)rd_skip(cnt + arcn->pad); + continue; + } + + /* + * check for pattern, and user specified options match. + * When all patterns are matched we are done. + */ + if ((res = pat_match(arcn)) < 0) + break; + + if ((res == 0) && (sel_chk(arcn) == 0)) { + /* + * pattern resulted in a selected file + */ + if (pat_sel(arcn) < 0) + break; + + /* + * modify the name as requested by the user if name + * survives modification, do a listing of the file + */ + if ((res = mod_name(arcn, RENM)) < 0) + break; + if (res == 0) { + if (arcn->name[0] == '/' && !check_Aflag()) { + memmove(arcn->name, arcn->name + 1, + strlen(arcn->name)); + } + ls_list(arcn, now, stdout); + } + /* + * if there's an error writing to stdout then we must + * stop now -- we're probably writing to a pipe that + * has been closed by the reader. + */ + if (ferror(stdout)) { + syswarn(1, errno, "Listing incomplete."); + break; + } + } + /* + * skip to next archive format header using values calculated + * by the format header read routine + */ + if (rd_skip(arcn->skip + arcn->pad) == 1) + break; + } + + /* + * all done, let format have a chance to cleanup, and make sure that + * the patterns supplied by the user were all matched + */ + (void)(*frmt->end_rd)(); + (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); + ar_close(); + pat_chk(); + + return 0; +} + +/* + * extract() + * extract the member(s) of an archive as specified by user supplied + * pattern(s) (no patterns extracts all members) + */ + +int +extract(void) +{ + ARCHD *arcn; + int res; + off_t cnt; + struct stat sb; + int fd; + time_t now; + + arcn = &archd; + /* + * figure out archive type; pass any format specific options to the + * archive option processing routine; call the format init routine; + * start up the directory modification time and access mode database + */ + if ((get_arc() < 0) || ((*frmt->options)() < 0) || + ((*frmt->st_rd)() < 0) || (dir_start() < 0)) + return 1; + + now = time(NULL); +#if !HAVE_NBTOOL_CONFIG_H + if (do_chroot) + (void)fdochroot(cwdfd); +#endif + + /* + * When we are doing interactive rename, we store the mapping of names + * so we can fix up hard links files later in the archive. + */ + if (iflag && (name_start() < 0)) + return 1; + + /* + * step through each entry on the archive until the format read routine + * says it is done + */ + while (next_head(arcn) == 0) { + int write_to_hard_link = 0; + + if (arcn->type == PAX_GLL || arcn->type == PAX_GLF) { + /* + * we need to read, to get the real filename + */ + if (!(*frmt->rd_data)(arcn, -arcn->type, &cnt)) + (void)rd_skip(cnt + arcn->pad); + continue; + } + + /* + * check for pattern, and user specified options match. When + * all the patterns are matched we are done + */ + if ((res = pat_match(arcn)) < 0) + break; + + if ((res > 0) || (sel_chk(arcn) != 0)) { + /* + * file is not selected. skip past any file + * data and padding and go back for the next + * archive member + */ + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + + if (kflag && (lstat(arcn->name, &sb) == 0)) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + + /* + * with -u or -D only extract when the archive member is newer + * than the file with the same name in the file system (no + * test of being the same type is required). + * NOTE: this test is done BEFORE name modifications as + * specified by pax. this operation can be confusing to the + * user who might expect the test to be done on an existing + * file AFTER the name mod. In honesty the pax spec is probably + * flawed in this respect. ignore this for GNU long links. + */ + if ((uflag || Dflag) && ((lstat(arcn->name, &sb) == 0))) { + if (uflag && Dflag) { + if ((arcn->sb.st_mtime <= sb.st_mtime) && + (arcn->sb.st_ctime <= sb.st_ctime)) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + } else if (Dflag) { + if (arcn->sb.st_ctime <= sb.st_ctime) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + } else if (arcn->sb.st_mtime <= sb.st_mtime) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + } + + /* + * this archive member is now been selected. modify the name. + */ + if ((pat_sel(arcn) < 0) || ((res = mod_name(arcn, RENM)) < 0)) + break; + if (res > 0) { + /* + * a bad name mod, skip and purge name from link table + */ + purg_lnk(arcn); + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + + if (arcn->name[0] == '/' && !check_Aflag()) { + memmove(arcn->name, arcn->name + 1, strlen(arcn->name)); + } + /* + * Non standard -Y and -Z flag. When the existing file is + * same age or newer skip; ignore this for GNU long links. + */ + if ((Yflag || Zflag) && ((lstat(arcn->name, &sb) == 0))) { + if (Yflag && Zflag) { + if ((arcn->sb.st_mtime <= sb.st_mtime) && + (arcn->sb.st_ctime <= sb.st_ctime)) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + } else if (Yflag) { + if (arcn->sb.st_ctime <= sb.st_ctime) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + } else if (arcn->sb.st_mtime <= sb.st_mtime) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + } + + if (vflag) { + if (vflag > 1) + ls_list(arcn, now, listf); + else { + (void)safe_print(arcn->name, listf); + vfpart = 1; + } + } + + /* + * if required, chdir around. + */ + if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL) && + !to_stdout) + dochdir(arcn->pat->chdname); + + if (secure && path_check(arcn, 0) != 0) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + + + /* + * all ok, extract this member based on type + */ + if ((arcn->type != PAX_REG) && (arcn->type != PAX_CTG)) { + /* + * process archive members that are not regular files. + * throw out padding and any data that might follow the + * header (as determined by the format). + */ + if ((arcn->type == PAX_HLK) || + (arcn->type == PAX_HRG)) + res = lnk_creat(arcn, &write_to_hard_link); + else + res = node_creat(arcn); + + if (!write_to_hard_link) { + (void)rd_skip(arcn->skip + arcn->pad); + if (res < 0) + purg_lnk(arcn); + + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + continue; + } + } + if (to_stdout) + fd = STDOUT_FILENO; + else { + /* + * We have a file with data here. If we cannot create + * it, skip over the data and purge the name from hard + * link table. + */ + if ((fd = file_creat(arcn, write_to_hard_link)) < 0) { + (void)fflush(listf); + (void)rd_skip(arcn->skip + arcn->pad); + purg_lnk(arcn); + continue; + } + } + /* + * extract the file from the archive and skip over padding and + * any unprocessed data + */ + res = (*frmt->rd_data)(arcn, fd, &cnt); + if (!to_stdout) + file_close(arcn, fd); + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + if (!res) + (void)rd_skip(cnt + arcn->pad); + + /* + * if required, chdir around. + */ + if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL)) + fdochdir(cwdfd); + } + + /* + * all done, restore directory modes and times as required; make sure + * all patterns supplied by the user were matched; block off signals + * to avoid chance for multiple entry into the cleanup code. + */ + (void)(*frmt->end_rd)(); + (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); + ar_close(); + proc_dir(); + pat_chk(); + + return 0; +} + +/* + * wr_archive() + * Write an archive. used in both creating a new archive and appends on + * previously written archive. + */ + +static int +wr_archive(ARCHD *arcn, int is_app) +{ + int res; + int hlk; + int wr_one; + off_t cnt; + int (*wrf)(ARCHD *); + int fd = -1; + time_t now; + + /* + * if this format supports hard link storage, start up the database + * that detects them. + */ + if (((hlk = frmt->hlk) == 1) && (lnk_start() < 0)) + return 1; + + /* + * start up the file traversal code and format specific write + */ + if ((ftree_start() < 0) || ((*frmt->st_wr)() < 0)) + return 1; + wrf = frmt->wr; + + now = time(NULL); + + /* + * When we are doing interactive rename, we store the mapping of names + * so we can fix up hard links files later in the archive. + */ + if (iflag && (name_start() < 0)) + return 1; + + /* + * if this is not append, and there are no files, we do no write a trailer + */ + wr_one = is_app; + + /* + * while there are files to archive, process them one at at time + */ + while (next_file(arcn) == 0) { + /* + * check if this file meets user specified options match. + */ + if (sel_chk(arcn) != 0) + continue; + /* + * Here we handle the exclusion -X gnu style patterns which + * are implemented like a pattern list. We don't modify the + * name as this will be done below again, and we don't want + * to double modify it. + */ + if ((res = mod_name(arcn, 0)) < 0) + break; + if (res == 1) + continue; + fd = -1; + if (uflag) { + /* + * only archive if this file is newer than a file with + * the same name that is already stored on the archive + */ + if ((res = chk_ftime(arcn)) < 0) + break; + if (res > 0) + continue; + } + + /* + * this file is considered selected now. see if this is a hard + * link to a file already stored + */ + ftree_sel(arcn); + if (hlk && (chk_lnk(arcn) < 0)) + break; + + if ((arcn->type == PAX_REG) || (arcn->type == PAX_HRG) || + (arcn->type == PAX_CTG)) { + /* + * we will have to read this file. by opening it now we + * can avoid writing a header to the archive for a file + * we were later unable to read (we also purge it from + * the link table). + */ + if ((fd = open(arcn->org_name, O_RDONLY, 0)) < 0) { + syswarn(1, errno, "Unable to open %s to read", + arcn->org_name); + purg_lnk(arcn); + continue; + } + } + + /* + * Now modify the name as requested by the user + */ + if ((res = mod_name(arcn, RENM)) < 0) { + /* + * name modification says to skip this file, close the + * file and purge link table entry + */ + rdfile_close(arcn, &fd); + purg_lnk(arcn); + break; + } + + if (arcn->name[0] == '/' && !check_Aflag()) { + memmove(arcn->name, arcn->name + 1, strlen(arcn->name)); + } + + if ((res > 0) || (docrc && (set_crc(arcn, fd) < 0))) { + /* + * unable to obtain the crc we need, close the file, + * purge link table entry + */ + rdfile_close(arcn, &fd); + purg_lnk(arcn); + continue; + } + + if (vflag) { + if (vflag > 1) + ls_list(arcn, now, listf); + else { + (void)safe_print(arcn->name, listf); + vfpart = 1; + } + } + ++flcnt; + + /* + * looks safe to store the file, have the format specific + * routine write routine store the file header on the archive + */ + if ((res = (*wrf)(arcn)) < 0) { + rdfile_close(arcn, &fd); + break; + } + wr_one = 1; + if (res > 0) { + /* + * format write says no file data needs to be stored + * so we are done messing with this file + */ + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + rdfile_close(arcn, &fd); + continue; + } + + /* + * Add file data to the archive, quit on write error. if we + * cannot write the entire file contents to the archive we + * must pad the archive to replace the missing file data + * (otherwise during an extract the file header for the file + * which FOLLOWS this one will not be where we expect it to + * be). + */ + res = (*frmt->wr_data)(arcn, fd, &cnt); + rdfile_close(arcn, &fd); + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + if (res < 0) + break; + + /* + * pad as required, cnt is number of bytes not written + */ + if (((cnt > 0) && (wr_skip(cnt) < 0)) || + ((arcn->pad > 0) && (wr_skip(arcn->pad) < 0))) + break; + } + + /* + * tell format to write trailer; pad to block boundary; reset directory + * mode/access times, and check if all patterns supplied by the user + * were matched. block off signals to avoid chance for multiple entry + * into the cleanup code + */ + if (wr_one) { + (*frmt->end_wr)(); + wr_fin(); + } + (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); + ar_close(); + if (tflag) + proc_dir(); + ftree_chk(); + + return 0; +} + +/* + * append() + * Add file to previously written archive. Archive format specified by the + * user must agree with archive. The archive is read first to collect + * modification times (if -u) and locate the archive trailer. The archive + * is positioned in front of the record with the trailer and wr_archive() + * is called to add the new members. + * PAX IMPLEMENTATION DETAIL NOTE: + * -u is implemented by adding the new members to the end of the archive. + * Care is taken so that these do not end up as links to the older + * version of the same file already stored in the archive. It is expected + * when extraction occurs these newer versions will over-write the older + * ones stored "earlier" in the archive (this may be a bad assumption as + * it depends on the implementation of the program doing the extraction). + * It is really difficult to splice in members without either re-writing + * the entire archive (from the point were the old version was), or having + * assistance of the format specification in terms of a special update + * header that invalidates a previous archive record. The posix spec left + * the method used to implement -u unspecified. This pax is able to + * over write existing files that it creates. + */ + +int +append(void) +{ + ARCHD *arcn; + int res; + FSUB *orgfrmt; + int udev; + off_t tlen; + + arcn = &archd; + orgfrmt = frmt; + + /* + * Do not allow an append operation if the actual archive is of a + * different format than the user specified format. + */ + if (get_arc() < 0) + return 1; + if ((orgfrmt != NULL) && (orgfrmt != frmt)) { + tty_warn(1, "Cannot mix current archive format %s with %s", + frmt->name, orgfrmt->name); + return 1; + } + + /* + * pass the format any options and start up format + */ + if (((*frmt->options)() < 0) || ((*frmt->st_rd)() < 0)) + return 1; + + /* + * if we only are adding members that are newer, we need to save the + * mod times for all files we see. + */ + if (uflag && (ftime_start() < 0)) + return 1; + + /* + * some archive formats encode hard links by recording the device and + * file serial number (inode) but copy the file anyway (multiple times) + * to the archive. When we append, we run the risk that newly added + * files may have the same device and inode numbers as those recorded + * on the archive but during a previous run. If this happens, when the + * archive is extracted we get INCORRECT hard links. We avoid this by + * remapping the device numbers so that newly added files will never + * use the same device number as one found on the archive. remapping + * allows new members to safely have links among themselves. remapping + * also avoids problems with file inode (serial number) truncations + * when the inode number is larger than storage space in the archive + * header. See the remap routines for more details. + */ + if ((udev = frmt->udev) && (dev_start() < 0)) + return 1; + + /* + * reading the archive may take a long time. If verbose tell the user + */ + if (vflag || Vflag) { + (void)fprintf(listf, + "%s: Reading archive to position at the end...", argv0); + vfpart = 1; + } + + /* + * step through the archive until the format says it is done + */ + while (next_head(arcn) == 0) { + /* + * check if this file meets user specified options. + */ + if (sel_chk(arcn) != 0) { + if (rd_skip(arcn->skip + arcn->pad) == 1) + break; + continue; + } + + if (uflag) { + /* + * see if this is the newest version of this file has + * already been seen, if so skip. + */ + if ((res = chk_ftime(arcn)) < 0) + break; + if (res > 0) { + if (rd_skip(arcn->skip + arcn->pad) == 1) + break; + continue; + } + } + + /* + * Store this device number. Device numbers seen during the + * read phase of append will cause newly appended files with a + * device number seen in the old part of the archive to be + * remapped to an unused device number. + */ + if ((udev && (add_dev(arcn) < 0)) || + (rd_skip(arcn->skip + arcn->pad) == 1)) + break; + } + + /* + * done, finish up read and get the number of bytes to back up so we + * can add new members. The format might have used the hard link table, + * purge it. + */ + tlen = (*frmt->end_rd)(); + lnk_end(); + + /* + * try to position for write, if this fails quit. if any error occurs, + * we will refuse to write + */ + if (appnd_start(tlen) < 0) + return 1; + + /* + * tell the user we are done reading. + */ + if ((vflag || Vflag) && vfpart) { + (void)safe_print("done.\n", listf); + vfpart = 0; + } + + /* + * go to the writing phase to add the new members + */ + res = wr_archive(arcn, 1); + if (res == 1) { + /* + * wr_archive failed in some way, but before any files were + * added. These are the only steps needed to cleanup (and + * not truncate the archive). + */ + wr_fin(); + (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); + ar_close(); + } + return res; +} + +/* + * archive() + * write a new archive + */ + +int +archive(void) +{ + + /* + * if we only are adding members that are newer, we need to save the + * mod times for all files; set up for writing; pass the format any + * options write the archive + */ + if ((uflag && (ftime_start() < 0)) || (wr_start() < 0)) + return 1; + if ((*frmt->options)() < 0) + return 1; + + return wr_archive(&archd, 0); +} + +/* + * copy() + * copy files from one part of the file system to another. this does not + * use any archive storage. The EFFECT OF THE COPY IS THE SAME as if an + * archive was written and then extracted in the destination directory + * (except the files are forced to be under the destination directory). + */ + +int +copy(void) +{ + ARCHD *arcn; + int res; + int fddest; + char *dest_pt; + size_t dlen; + size_t drem; + int fdsrc = -1; + struct stat sb; + char dirbuf[PAXPATHLEN+1]; + + arcn = &archd; + /* + * set up the destination dir path and make sure it is a directory. We + * make sure we have a trailing / on the destination + */ + dlen = strlcpy(dirbuf, dirptr, sizeof(dirbuf)); + if (dlen >= sizeof(dirbuf) || + (dlen == sizeof(dirbuf) - 1 && dirbuf[dlen - 1] != '/')) { + tty_warn(1, "directory name is too long %s", dirptr); + return 1; + } + dest_pt = dirbuf + dlen; + if (*(dest_pt-1) != '/') { + *dest_pt++ = '/'; + ++dlen; + } + *dest_pt = '\0'; + drem = PAXPATHLEN - dlen; + + if (stat(dirptr, &sb) < 0) { + syswarn(1, errno, "Cannot access destination directory %s", + dirptr); + return 1; + } + if (!S_ISDIR(sb.st_mode)) { + tty_warn(1, "Destination is not a directory %s", dirptr); + return 1; + } + + /* + * start up the hard link table; file traversal routines and the + * modification time and access mode database + */ + if ((lnk_start() < 0) || (ftree_start() < 0) || (dir_start() < 0)) + return 1; + + /* + * When we are doing interactive rename, we store the mapping of names + * so we can fix up hard links files later in the archive. + */ + if (iflag && (name_start() < 0)) + return 1; + + /* + * set up to cp file trees + */ + cp_start(); + + /* + * while there are files to archive, process them + */ + while (next_file(arcn) == 0) { + fdsrc = -1; + + /* + * check if this file meets user specified options + */ + if (sel_chk(arcn) != 0) + continue; + + /* + * if there is already a file in the destination directory with + * the same name and it is newer, skip the one stored on the + * archive. + * NOTE: this test is done BEFORE name modifications as + * specified by pax. this can be confusing to the user who + * might expect the test to be done on an existing file AFTER + * the name mod. In honesty the pax spec is probably flawed in + * this respect + */ + if (uflag || Dflag) { + /* + * create the destination name + */ + if (strlcpy(dest_pt, arcn->name + (*arcn->name == '/'), + drem + 1) > drem) { + tty_warn(1, "Destination pathname too long %s", + arcn->name); + continue; + } + + /* + * if existing file is same age or newer skip + */ + res = lstat(dirbuf, &sb); + *dest_pt = '\0'; + + if (res == 0) { + if (uflag && Dflag) { + if ((arcn->sb.st_mtime<=sb.st_mtime) && + (arcn->sb.st_ctime<=sb.st_ctime)) + continue; + } else if (Dflag) { + if (arcn->sb.st_ctime <= sb.st_ctime) + continue; + } else if (arcn->sb.st_mtime <= sb.st_mtime) + continue; + } + } + + /* + * this file is considered selected. See if this is a hard link + * to a previous file; modify the name as requested by the + * user; set the final destination. + */ + ftree_sel(arcn); + if ((chk_lnk(arcn) < 0) || ((res = mod_name(arcn, RENM)) < 0)) + break; + if ((res > 0) || (set_dest(arcn, dirbuf, dlen) < 0)) { + /* + * skip file, purge from link table + */ + purg_lnk(arcn); + continue; + } + + /* + * Non standard -Y and -Z flag. When the exisiting file is + * same age or newer skip + */ + if ((Yflag || Zflag) && ((lstat(arcn->name, &sb) == 0))) { + if (Yflag && Zflag) { + if ((arcn->sb.st_mtime <= sb.st_mtime) && + (arcn->sb.st_ctime <= sb.st_ctime)) + continue; + } else if (Yflag) { + if (arcn->sb.st_ctime <= sb.st_ctime) + continue; + } else if (arcn->sb.st_mtime <= sb.st_mtime) + continue; + } + + if (vflag) { + (void)safe_print(arcn->name, listf); + vfpart = 1; + } + ++flcnt; + + /* + * try to create a hard link to the src file if requested + * but make sure we are not trying to overwrite ourselves. + */ + if (lflag) + res = cross_lnk(arcn); + else + res = chk_same(arcn); + if (res <= 0) { + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + continue; + } + + /* + * have to create a new file + */ + if ((arcn->type != PAX_REG) && (arcn->type != PAX_CTG)) { + /* + * create a link or special file + */ + if ((arcn->type == PAX_HLK) || + (arcn->type == PAX_HRG)) { + int payload; + + res = lnk_creat(arcn, &payload); + } else { + res = node_creat(arcn); + } + if (res < 0) + purg_lnk(arcn); + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + continue; + } + + /* + * have to copy a regular file to the destination directory. + * first open source file and then create the destination file + */ + if ((fdsrc = open(arcn->org_name, O_RDONLY, 0)) < 0) { + syswarn(1, errno, "Unable to open %s to read", + arcn->org_name); + purg_lnk(arcn); + continue; + } + if ((fddest = file_creat(arcn, 0)) < 0) { + rdfile_close(arcn, &fdsrc); + purg_lnk(arcn); + continue; + } + + /* + * copy source file data to the destination file + */ + cp_file(arcn, fdsrc, fddest); + file_close(arcn, fddest); + rdfile_close(arcn, &fdsrc); + + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + } + + /* + * restore directory modes and times as required; make sure all + * patterns were selected block off signals to avoid chance for + * multiple entry into the cleanup code. + */ + (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); + ar_close(); + proc_dir(); + ftree_chk(); + + return 0; +} + +/* + * next_head() + * try to find a valid header in the archive. Uses format specific + * routines to extract the header and id the trailer. Trailers may be + * located within a valid header or in an invalid header (the location + * is format specific. The inhead field from the option table tells us + * where to look for the trailer). + * We keep reading (and resyncing) until we get enough contiguous data + * to check for a header. If we cannot find one, we shift by a byte + * add a new byte from the archive to the end of the buffer and try again. + * If we get a read error, we throw out what we have (as we must have + * contiguous data) and start over again. + * ASSUMED: headers fit within a BLKMULT header. + * Return: + * 0 if we got a header, -1 if we are unable to ever find another one + * (we reached the end of input, or we reached the limit on retries. see + * the specs for rd_wrbuf() for more details) + */ + +static int +next_head(ARCHD *arcn) +{ + int ret; + char *hdend; + int res; + int shftsz; + int hsz; + int in_resync = 0; /* set when we are in resync mode */ + int cnt = 0; /* counter for trailer function */ + int first = 1; /* on 1st read, EOF isn't premature. */ + + /* + * set up initial conditions, we want a whole frmt->hsz block as we + * have no data yet. + */ + res = hsz = frmt->hsz; + hdend = hdbuf; + shftsz = hsz - 1; + for(;;) { + /* + * keep looping until we get a contiguous FULL buffer + * (frmt->hsz is the proper size) + */ + for (;;) { + if ((ret = rd_wrbuf(hdend, res)) == res) + break; + + /* + * If we read 0 bytes (EOF) from an archive when we + * expect to find a header, we have stepped upon + * an archive without the customary block of zeroes + * end marker. It's just stupid to error out on + * them, so exit gracefully. + */ + if (first && ret == 0) + return -1; + first = 0; + + /* + * some kind of archive read problem, try to resync the + * storage device, better give the user the bad news. + */ + if ((ret == 0) || (rd_sync() < 0)) { + tty_warn(1, + "Premature end of file on archive read"); + return -1; + } + if (!in_resync) { + if (act == APPND) { + tty_warn(1, + "Archive I/O error, cannot continue"); + return -1; + } + tty_warn(1, + "Archive I/O error. Trying to recover."); + ++in_resync; + } + + /* + * oh well, throw it all out and start over + */ + res = hsz; + hdend = hdbuf; + } + + /* + * ok we have a contiguous buffer of the right size. Call the + * format read routine. If this was not a valid header and this + * format stores trailers outside of the header, call the + * format specific trailer routine to check for a trailer. We + * have to watch out that we do not mis-identify file data or + * block padding as a header or trailer. Format specific + * trailer functions must NOT check for the trailer while we + * are running in resync mode. Some trailer functions may tell + * us that this block cannot contain a valid header either, so + * we then throw out the entire block and start over. + */ + if ((*frmt->rd)(arcn, hdbuf) == 0) + break; + + if (!frmt->inhead) { + /* + * this format has trailers outside of valid headers + */ + if ((ret = (*frmt->trail)(hdbuf,in_resync,&cnt)) == 0){ + /* + * valid trailer found, drain input as required + */ + ar_drain(); + return -1; + } + + if (ret == 1) { + /* + * we are in resync and we were told to throw + * the whole block out because none of the + * bytes in this block can be used to form a + * valid header + */ + res = hsz; + hdend = hdbuf; + continue; + } + } + + /* + * Brute force section. + * not a valid header. We may be able to find a header yet. So + * we shift over by one byte, and set up to read one byte at a + * time from the archive and place it at the end of the buffer. + * We will keep moving byte at a time until we find a header or + * get a read error and have to start over. + */ + if (!in_resync) { + if (act == APPND) { + tty_warn(1, + "Unable to append, archive header flaw"); + return -1; + } + tty_warn(1, + "Invalid header, starting valid header search."); + ++in_resync; + } + memmove(hdbuf, hdbuf+1, shftsz); + res = 1; + hdend = hdbuf + shftsz; + } + + /* + * ok got a valid header, check for trailer if format encodes it in the + * the header. NOTE: the parameters are different than trailer routines + * which encode trailers outside of the header! + */ + if (frmt->inhead && ((*frmt->subtrail)(arcn) == 0)) { + /* + * valid trailer found, drain input as required + */ + ar_drain(); + return -1; + } + + ++flcnt; + return 0; +} + +/* + * get_arc() + * Figure out what format an archive is. Handles archive with flaws by + * brute force searches for a legal header in any supported format. The + * format id routines have to be careful to NOT mis-identify a format. + * ASSUMED: headers fit within a BLKMULT header. + * Return: + * 0 if archive found -1 otherwise + */ + +static int +get_arc(void) +{ + int i; + int hdsz = 0; + int res; + int minhd = BLKMULT; + char *hdend; + int notice = 0; + + /* + * find the smallest header size in all archive formats and then set up + * to read the archive. + */ + for (i = 0; ford[i] >= 0; ++i) { + if (fsub[ford[i]].hsz < minhd) + minhd = fsub[ford[i]].hsz; + } + if (rd_start() < 0) + return -1; + res = BLKMULT; + hdsz = 0; + hdend = hdbuf; + for(;;) { + for (;;) { + /* + * fill the buffer with at least the smallest header + */ + i = rd_wrbuf(hdend, res); + if (i > 0) + hdsz += i; + if (hdsz >= minhd) + break; + + /* + * if we cannot recover from a read error quit + */ + if ((i == 0) || (rd_sync() < 0)) + goto out; + + /* + * when we get an error none of the data we already + * have can be used to create a legal header (we just + * got an error in the middle), so we throw it all out + * and refill the buffer with fresh data. + */ + res = BLKMULT; + hdsz = 0; + hdend = hdbuf; + if (!notice) { + if (act == APPND) + return -1; + tty_warn(1, + "Cannot identify format. Searching..."); + ++notice; + } + } + + /* + * we have at least the size of the smallest header in any + * archive format. Look to see if we have a match. The array + * ford[] is used to specify the header id order to reduce the + * chance of incorrectly id'ing a valid header (some formats + * may be subsets of each other and the order would then be + * important). + */ + for (i = 0; ford[i] >= 0; ++i) { + if ((*fsub[ford[i]].id)(hdbuf, hdsz) < 0) + continue; + frmt = &(fsub[ford[i]]); + /* + * yuck, to avoid slow special case code in the extract + * routines, just push this header back as if it was + * not seen. We have left extra space at start of the + * buffer for this purpose. This is a bit ugly, but + * adding all the special case code is far worse. + */ + pback(hdbuf, hdsz); + return 0; + } + + /* + * We have a flawed archive, no match. we start searching, but + * we never allow additions to flawed archives + */ + if (!notice) { + if (act == APPND) + return -1; + tty_warn(1, "Cannot identify format. Searching..."); + ++notice; + } + + /* + * brute force search for a header that we can id. + * we shift through byte at a time. this is slow, but we cannot + * determine the nature of the flaw in the archive in a + * portable manner + */ + if (--hdsz > 0) { + memmove(hdbuf, hdbuf+1, hdsz); + res = BLKMULT - hdsz; + hdend = hdbuf + hdsz; + } else { + res = BLKMULT; + hdend = hdbuf; + hdsz = 0; + } + } + + out: + /* + * we cannot find a header, bow, apologize and quit + */ + tty_warn(1, "Sorry, unable to determine archive format."); + return -1; +} diff --git a/bin/pax/buf_subs.c b/bin/pax/buf_subs.c new file mode 100644 index 0000000..e4b97af --- /dev/null +++ b/bin/pax/buf_subs.c @@ -0,0 +1,1022 @@ +/* $NetBSD: buf_subs.c,v 1.29 2018/03/19 03:11:39 msaitoh Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +#if !defined(lint) +#if 0 +static char sccsid[] = "@(#)buf_subs.c 8.2 (Berkeley) 4/18/94"; +#else +__RCSID("$NetBSD: buf_subs.c,v 1.29 2018/03/19 03:11:39 msaitoh Exp $"); +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "extern.h" + +/* + * routines which implement archive and file buffering + */ + +#define MINFBSZ 512 /* default block size for hole detect */ +#define MAXFLT 10 /* default media read error limit */ + +/* + * Need to change bufmem to dynamic allocation when the upper + * limit on blocking size is removed (though that will violate pax spec) + * MAXBLK define and tests will also need to be updated. + */ +static char bufmem[MAXBLK+BLKMULT]; /* i/o buffer + pushback id space */ +static char *buf; /* normal start of i/o buffer */ +static char *bufend; /* end or last char in i/o buffer */ +static char *bufpt; /* read/write point in i/o buffer */ +int blksz = MAXBLK; /* block input/output size in bytes */ +int wrblksz; /* user spec output size in bytes */ +int maxflt = MAXFLT; /* MAX consecutive media errors */ +int rdblksz; /* first read blksize (tapes only) */ +off_t wrlimit; /* # of bytes written per archive vol */ +off_t wrcnt; /* # of bytes written on current vol */ +off_t rdcnt; /* # of bytes read on current vol */ + +/* + * wr_start() + * set up the buffering system to operate in a write mode + * Return: + * 0 if ok, -1 if the user specified write block size violates pax spec + */ + +int +wr_start(void) +{ + buf = &(bufmem[BLKMULT]); + /* + * Check to make sure the write block size meets pax specs. If the user + * does not specify a blocksize, we use the format default blocksize. + * We must be picky on writes, so we do not allow the user to create an + * archive that might be hard to read elsewhere. If all ok, we then + * open the first archive volume + */ + if (!wrblksz) + wrblksz = frmt->bsz; + if (wrblksz > MAXBLK) { + tty_warn(1, "Write block size of %d too large, maximum is: %d", + wrblksz, MAXBLK); + return -1; + } + if (wrblksz % BLKMULT) { + tty_warn(1, "Write block size of %d is not a %d byte multiple", + wrblksz, BLKMULT); + return -1; + } + + /* + * we only allow wrblksz to be used with all archive operations + */ + blksz = rdblksz = wrblksz; + if ((ar_open(arcname) < 0) && (ar_next() < 0)) + return -1; + wrcnt = 0; + bufend = buf + wrblksz; + bufpt = buf; + return 0; +} + +/* + * rd_start() + * set up buffering system to read an archive + * Return: + * 0 if ok, -1 otherwise + */ + +int +rd_start(void) +{ + /* + * leave space for the header pushback (see get_arc()). If we are + * going to append and user specified a write block size, check it + * right away + */ + buf = &(bufmem[BLKMULT]); + if ((act == APPND) && wrblksz) { + if (wrblksz > MAXBLK) { + tty_warn(1, + "Write block size %d too large, maximum is: %d", + wrblksz, MAXBLK); + return -1; + } + if (wrblksz % BLKMULT) { + tty_warn(1, + "Write block size %d is not a %d byte multiple", + wrblksz, BLKMULT); + return -1; + } + } + + /* + * open the archive + */ + if ((ar_open(arcname) < 0) && (ar_next() < 0)) + return -1; + bufend = buf + rdblksz; + bufpt = bufend; + rdcnt = 0; + return 0; +} + +/* + * cp_start() + * set up buffer system for copying within the file system + */ + +void +cp_start(void) +{ + buf = &(bufmem[BLKMULT]); + rdblksz = blksz = MAXBLK; +} + +/* + * appnd_start() + * Set up the buffering system to append new members to an archive that + * was just read. The last block(s) of an archive may contain a format + * specific trailer. To append a new member, this trailer has to be + * removed from the archive. The first byte of the trailer is replaced by + * the start of the header of the first file added to the archive. The + * format specific end read function tells us how many bytes to move + * backwards in the archive to be positioned BEFORE the trailer. Two + * different positions have to be adjusted, the O.S. file offset (e.g. the + * position of the tape head) and the write point within the data we have + * stored in the read (soon to become write) buffer. We may have to move + * back several records (the number depends on the size of the archive + * record and the size of the format trailer) to read up the record where + * the first byte of the trailer is recorded. Trailers may span (and + * overlap) record boundaries. + * We first calculate which record has the first byte of the trailer. We + * move the OS file offset back to the start of this record and read it + * up. We set the buffer write pointer to be at this byte (the byte where + * the trailer starts). We then move the OS file pointer back to the + * start of this record so a flush of this buffer will replace the record + * in the archive. + * A major problem is rewriting this last record. For archives stored + * on disk files, this is trivial. However, many devices are really picky + * about the conditions under which they will allow a write to occur. + * Often devices restrict the conditions where writes can be made, + * so it may not be feasable to append archives stored on all types of + * devices. + * Return: + * 0 for success, -1 for failure + */ + +int +appnd_start(off_t skcnt) +{ + int res; + off_t cnt; + + if (exit_val != 0) { + tty_warn(0, "Cannot append to an archive that may have flaws."); + return -1; + } + /* + * if the user did not specify a write blocksize, inherit the size used + * in the last archive volume read. (If a is set we still use rdblksz + * until next volume, cannot shift sizes within a single volume). + */ + if (!wrblksz) + wrblksz = blksz = rdblksz; + else + blksz = rdblksz; + + /* + * make sure that this volume allows appends + */ + if (ar_app_ok() < 0) + return -1; + + /* + * Calculate bytes to move back and move in front of record where we + * need to start writing from. Remember we have to add in any padding + * that might be in the buffer after the trailer in the last block. We + * travel skcnt + padding ROUNDED UP to blksize. + */ + skcnt += bufend - bufpt; + if ((cnt = (skcnt/blksz) * blksz) < skcnt) + cnt += blksz; + if (ar_rev((off_t)cnt) < 0) + goto out; + + /* + * We may have gone too far if there is valid data in the block we are + * now in front of, read up the block and position the pointer after + * the valid data. + */ + if ((cnt -= skcnt) > 0) { + /* + * watch out for stupid tape drives. ar_rev() will set rdblksz + * to be real physical blocksize so we must loop until we get + * the old rdblksz (now in blksz). If ar_rev() fouls up the + * determination of the physical block size, we will fail. + */ + bufpt = buf; + bufend = buf + blksz; + while (bufpt < bufend) { + if ((res = ar_read(bufpt, rdblksz)) <= 0) + goto out; + bufpt += res; + } + if (ar_rev((off_t)(bufpt - buf)) < 0) + goto out; + bufpt = buf + cnt; + bufend = buf + blksz; + } else { + /* + * buffer is empty + */ + bufend = buf + blksz; + bufpt = buf; + } + rdblksz = blksz; + rdcnt -= skcnt; + wrcnt = 0; + + /* + * At this point we are ready to write. If the device requires special + * handling to write at a point were previously recorded data resides, + * that is handled in ar_set_wr(). From now on we operate under normal + * ARCHIVE mode (write) conditions + */ + if (ar_set_wr() < 0) + return -1; + act = ARCHIVE; + return 0; + + out: + tty_warn(1, "Unable to rewrite archive trailer, cannot append."); + return -1; +} + +/* + * rd_sync() + * A read error occurred on this archive volume. Resync the buffer and + * try to reset the device (if possible) so we can continue to read. Keep + * trying to do this until we get a valid read, or we reach the limit on + * consecutive read faults (at which point we give up). The user can + * adjust the read error limit through a command line option. + * Returns: + * 0 on success, and -1 on failure + */ + +int +rd_sync(void) +{ + int errcnt = 0; + int res; + + /* + * if the user says bail out on first fault, we are out of here... + */ + if (maxflt == 0) + return -1; + if (act == APPND) { + tty_warn(1, + "Unable to append when there are archive read errors."); + return -1; + } + + /* + * poke at device and try to get past media error + */ + if (ar_rdsync() < 0) { + if (ar_next() < 0) + return -1; + else + rdcnt = 0; + } + + for (;;) { + if ((res = ar_read(buf, blksz)) > 0) { + /* + * All right! got some data, fill that buffer + */ + bufpt = buf; + bufend = buf + res; + rdcnt += res; + return 0; + } + + /* + * Oh well, yet another failed read... + * if error limit reached, ditch. otherwise poke device to move past + * bad media and try again. if media is badly damaged, we ask + * the poor (and upset user at this point) for the next archive + * volume. remember the goal on reads is to get the most we + * can extract out of the archive. + */ + if ((maxflt > 0) && (++errcnt > maxflt)) + tty_warn(0, + "Archive read error limit (%d) reached",maxflt); + else if (ar_rdsync() == 0) + continue; + if (ar_next() < 0) + break; + rdcnt = 0; + errcnt = 0; + } + return -1; +} + +/* + * pback() + * push the data used during the archive id phase back into the I/O + * buffer. This is required as we cannot be sure that the header does NOT + * overlap a block boundary (as in the case we are trying to recover a + * flawed archived). This was not designed to be used for any other + * purpose. (What software engineering, HA!) + * WARNING: do not even THINK of pback greater than BLKMULT, unless the + * pback space is increased. + */ + +void +pback(char *pt, int cnt) +{ + bufpt -= cnt; + memcpy(bufpt, pt, cnt); + return; +} + +/* + * rd_skip() + * skip forward in the archive during an archive read. Used to get quickly + * past file data and padding for files the user did NOT select. + * Return: + * 0 if ok, -1 failure, and 1 when EOF on the archive volume was detected. + */ + +int +rd_skip(off_t skcnt) +{ + off_t res; + off_t cnt; + off_t skipped = 0; + + /* + * consume what data we have in the buffer. If we have to move forward + * whole records, we call the low level skip function to see if we can + * move within the archive without doing the expensive reads on data we + * do not want. + */ + if (skcnt == 0) + return 0; + res = MIN((bufend - bufpt), skcnt); + bufpt += res; + skcnt -= res; + + /* + * if skcnt is now 0, then no additional i/o is needed + */ + if (skcnt == 0) + return 0; + + /* + * We have to read more, calculate complete and partial record reads + * based on rdblksz. we skip over "cnt" complete records + */ + res = skcnt%rdblksz; + cnt = (skcnt/rdblksz) * rdblksz; + + /* + * if the skip fails, we will have to resync. ar_fow will tell us + * how much it can skip over. We will have to read the rest. + */ + if (ar_fow(cnt, &skipped) < 0) + return -1; + res += cnt - skipped; + rdcnt += skipped; + + /* + * what is left we have to read (which may be the whole thing if + * ar_fow() told us the device can only read to skip records); + */ + while (res > 0L) { + cnt = bufend - bufpt; + /* + * if the read fails, we will have to resync + */ + if ((cnt <= 0) && ((cnt = buf_fill()) < 0)) + return -1; + if (cnt == 0) + return 1; + cnt = MIN(cnt, res); + bufpt += cnt; + res -= cnt; + } + return 0; +} + +/* + * wr_fin() + * flush out any data (and pad if required) the last block. We always pad + * with zero (even though we do not have to). Padding with 0 makes it a + * lot easier to recover if the archive is damaged. zero paddding SHOULD + * BE a requirement.... + */ + +void +wr_fin(void) +{ + if (bufpt > buf) { + memset(bufpt, 0, bufend - bufpt); + bufpt = bufend; + (void)buf_flush(blksz); + } +} + +/* + * wr_rdbuf() + * fill the write buffer from data passed to it in a buffer (usually used + * by format specific write routines to pass a file header). On failure we + * punt. We do not allow the user to continue to write flawed archives. + * We assume these headers are not very large (the memory copy we use is + * a bit expensive). + * Return: + * 0 if buffer was filled ok, -1 o.w. (buffer flush failure) + */ + +int +wr_rdbuf(char *out, int outcnt) +{ + int cnt; + + /* + * while there is data to copy into the write buffer. when the + * write buffer fills, flush it to the archive and continue + */ + while (outcnt > 0) { + cnt = bufend - bufpt; + if ((cnt <= 0) && ((cnt = buf_flush(blksz)) < 0)) + return -1; + /* + * only move what we have space for + */ + cnt = MIN(cnt, outcnt); + memcpy(bufpt, out, cnt); + bufpt += cnt; + out += cnt; + outcnt -= cnt; + } + return 0; +} + +/* + * rd_wrbuf() + * copy from the read buffer into a supplied buffer a specified number of + * bytes. If the read buffer is empty fill it and continue to copy. + * usually used to obtain a file header for processing by a format + * specific read routine. + * Return + * number of bytes copied to the buffer, 0 indicates EOF on archive volume, + * -1 is a read error + */ + +int +rd_wrbuf(char *in, int cpcnt) +{ + int res; + int cnt; + int incnt = cpcnt; + + /* + * loop until we fill the buffer with the requested number of bytes + */ + while (incnt > 0) { + cnt = bufend - bufpt; + if ((cnt <= 0) && ((cnt = buf_fill()) <= 0)) { + /* + * read error, return what we got (or the error if + * no data was copied). The caller must know that an + * error occurred and has the best knowledge what to + * do with it + */ + if ((res = cpcnt - incnt) > 0) + return res; + return cnt; + } + + /* + * calculate how much data to copy based on whats left and + * state of buffer + */ + cnt = MIN(cnt, incnt); + memcpy(in, bufpt, cnt); + bufpt += cnt; + incnt -= cnt; + in += cnt; + } + return cpcnt; +} + +/* + * wr_skip() + * skip forward during a write. In other words add padding to the file. + * we add zero filled padding as it makes flawed archives much easier to + * recover from. the caller tells us how many bytes of padding to add + * This routine was not designed to add HUGE amount of padding, just small + * amounts (a few 512 byte blocks at most) + * Return: + * 0 if ok, -1 if there was a buf_flush failure + */ + +int +wr_skip(off_t skcnt) +{ + int cnt; + + /* + * loop while there is more padding to add + */ + while (skcnt > 0L) { + cnt = bufend - bufpt; + if ((cnt <= 0) && ((cnt = buf_flush(blksz)) < 0)) + return -1; + cnt = MIN(cnt, skcnt); + memset(bufpt, 0, cnt); + bufpt += cnt; + skcnt -= cnt; + } + return 0; +} + +/* + * wr_rdfile() + * fill write buffer with the contents of a file. We are passed an open + * file descriptor to the file an the archive structure that describes the + * file we are storing. The variable "left" is modified to contain the + * number of bytes of the file we were NOT able to write to the archive. + * it is important that we always write EXACTLY the number of bytes that + * the format specific write routine told us to. The file can also get + * bigger, so reading to the end of file would create an improper archive, + * we just detect this case and warn the user. We never create a bad + * archive if we can avoid it. Of course trying to archive files that are + * active is asking for trouble. It we fail, we pass back how much we + * could NOT copy and let the caller deal with it. + * Return: + * 0 ok, -1 if archive write failure. a short read of the file returns a + * 0, but "left" is set to be greater than zero. + */ + +int +wr_rdfile(ARCHD *arcn, int ifd, off_t *left) +{ + int cnt; + int res = 0; + off_t size = arcn->sb.st_size; + struct stat origsb, sb; + + /* + * by default, remember the previously obtained stat information + * (in arcn->sb) for comparing the mtime after reading. + * if Mflag is set, use the actual mtime instead. + */ + origsb = arcn->sb; + if (Mflag && (fstat(ifd, &origsb) < 0)) + syswarn(1, errno, "Failed stat on %s", arcn->org_name); + + /* + * while there are more bytes to write + */ + while (size > 0L) { + cnt = bufend - bufpt; + if ((cnt <= 0) && ((cnt = buf_flush(blksz)) < 0)) { + *left = size; + return -1; + } + cnt = MIN(cnt, size); + if ((res = read_with_restart(ifd, bufpt, cnt)) <= 0) + break; + size -= res; + bufpt += res; + } + + /* + * better check the file did not change during this operation + * or the file read failed. + */ + if (res < 0) + syswarn(1, errno, "Read fault on %s", arcn->org_name); + else if (size != 0L) + tty_warn(1, "File changed size during read %s", arcn->org_name); + else if (fstat(ifd, &sb) < 0) + syswarn(1, errno, "Failed stat on %s", arcn->org_name); + else if (origsb.st_mtime != sb.st_mtime) + tty_warn(1, "File %s was modified during copy to archive", + arcn->org_name); + *left = size; + return 0; +} + +/* + * rd_wrfile() + * extract the contents of a file from the archive. If we are unable to + * extract the entire file (due to failure to write the file) we return + * the numbers of bytes we did NOT process. This way the caller knows how + * many bytes to skip past to find the next archive header. If the failure + * was due to an archive read, we will catch that when we try to skip. If + * the format supplies a file data crc value, we calculate the actual crc + * so that it can be compared to the value stored in the header + * NOTE: + * We call a special function to write the file. This function attempts to + * restore file holes (blocks of zeros) into the file. When files are + * sparse this saves space, and is a LOT faster. For non sparse files + * the performance hit is small. As of this writing, no archive supports + * information on where the file holes are. + * Return: + * 0 ok, -1 if archive read failure. if we cannot write the entire file, + * we return a 0 but "left" is set to be the amount unwritten + */ + +int +rd_wrfile(ARCHD *arcn, int ofd, off_t *left) +{ + int cnt = 0; + off_t size = arcn->sb.st_size; + int res = 0; + char *fnm = arcn->name; + int isem = 1; + int rem; + int sz = MINFBSZ; + struct stat sb; + u_long crc = 0L; + + /* + * pass the blocksize of the file being written to the write routine, + * if the size is zero, use the default MINFBSZ + */ + if (ofd < 0) + sz = PAXPATHLEN+1; + else if (fstat(ofd, &sb) == 0) { + if (sb.st_blksize > 0) + sz = (int)sb.st_blksize; + } else + syswarn(0, errno, + "Unable to obtain block size for file %s", fnm); + rem = sz; + *left = 0L; + + /* + * Copy the archive to the file the number of bytes specified. We have + * to assume that we want to recover file holes as none of the archive + * formats can record the location of file holes. + */ + while (size > 0L) { + cnt = bufend - bufpt; + /* + * if we get a read error, we do not want to skip, as we may + * miss a header, so we do not set left, but if we get a write + * error, we do want to skip over the unprocessed data. + */ + if ((cnt <= 0) && ((cnt = buf_fill()) <= 0)) + break; + cnt = MIN(cnt, size); + if ((res = file_write(ofd,bufpt,cnt,&rem,&isem,sz,fnm)) <= 0) { + *left = size; + break; + } + + if (docrc) { + /* + * update the actual crc value + */ + cnt = res; + while (--cnt >= 0) + crc += *bufpt++ & 0xff; + } else + bufpt += res; + size -= res; + } + + /* + * if the last block has a file hole (all zero), we must make sure this + * gets updated in the file. We force the last block of zeros to be + * written. just closing with the file offset moved forward may not put + * a hole at the end of the file. + */ + if (ofd >= 0 && isem && (arcn->sb.st_size > 0L)) + file_flush(ofd, fnm, isem); + + /* + * if we failed from archive read, we do not want to skip + */ + if ((size > 0L) && (*left == 0L)) + return -1; + + /* + * some formats record a crc on file data. If so, then we compare the + * calculated crc to the crc stored in the archive + */ + if (docrc && (size == 0L) && (arcn->crc != crc)) + tty_warn(1,"Actual crc does not match expected crc %s", + arcn->name); + return 0; +} + +/* + * cp_file() + * copy the contents of one file to another. used during -rw phase of pax + * just as in rd_wrfile() we use a special write function to write the + * destination file so we can properly copy files with holes. + */ + +void +cp_file(ARCHD *arcn, int fd1, int fd2) +{ + int cnt; + off_t cpcnt = 0L; + int res = 0; + char *fnm = arcn->name; + int no_hole = 0; + int isem = 1; + int rem; + int sz = MINFBSZ; + struct stat sb, origsb; + + /* + * check for holes in the source file. If none, we will use regular + * write instead of file write. + */ + if (((off_t)(arcn->sb.st_blocks * BLKMULT)) >= arcn->sb.st_size) + ++no_hole; + + /* + * by default, remember the previously obtained stat information + * (in arcn->sb) for comparing the mtime after reading. + * if Mflag is set, use the actual mtime instead. + */ + origsb = arcn->sb; + if (Mflag && (fstat(fd1, &origsb) < 0)) + syswarn(1, errno, "Failed stat on %s", arcn->org_name); + + /* + * pass the blocksize of the file being written to the write routine, + * if the size is zero, use the default MINFBSZ + */ + if (fstat(fd2, &sb) == 0) { + if (sb.st_blksize > 0) + sz = sb.st_blksize; + } else + syswarn(0, errno, + "Unable to obtain block size for file %s", fnm); + rem = sz; + + /* + * read the source file and copy to destination file until EOF + */ + for(;;) { + if ((cnt = read_with_restart(fd1, buf, blksz)) <= 0) + break; + if (no_hole) + res = xwrite(fd2, buf, cnt); + else + res = file_write(fd2, buf, cnt, &rem, &isem, sz, fnm); + if (res != cnt) + break; + cpcnt += cnt; + } + + /* + * check to make sure the copy is valid. + */ + if (res < 0) + syswarn(1, errno, "Failed write during copy of %s to %s", + arcn->org_name, arcn->name); + else if (cpcnt != arcn->sb.st_size) + tty_warn(1, "File %s changed size during copy to %s", + arcn->org_name, arcn->name); + else if (fstat(fd1, &sb) < 0) + syswarn(1, errno, "Failed stat of %s", arcn->org_name); + else if (origsb.st_mtime != sb.st_mtime) + tty_warn(1, "File %s was modified during copy to %s", + arcn->org_name, arcn->name); + + /* + * if the last block has a file hole (all zero), we must make sure this + * gets updated in the file. We force the last block of zeros to be + * written. just closing with the file offset moved forward may not put + * a hole at the end of the file. + */ + if (!no_hole && isem && (arcn->sb.st_size > 0L)) + file_flush(fd2, fnm, isem); + return; +} + +/* + * buf_fill() + * fill the read buffer with the next record (or what we can get) from + * the archive volume. + * Return: + * Number of bytes of data in the read buffer, -1 for read error, and + * 0 when finished (user specified termination in ar_next()). + */ + +int +buf_fill(void) +{ + int cnt; + static int fini = 0; + + if (fini) + return 0; + + for(;;) { + /* + * try to fill the buffer. on error the next archive volume is + * opened and we try again. + */ + if ((cnt = ar_read(buf, blksz)) > 0) { + bufpt = buf; + bufend = buf + cnt; + rdcnt += cnt; + return cnt; + } + + /* + * errors require resync, EOF goes to next archive + * but in case we have not determined yet the format, + * this means that we have a very short file, so we + * are done again. + */ + if (cnt < 0) + break; + if (frmt == NULL || ar_next() < 0) { + fini = 1; + return 0; + } + rdcnt = 0; + } + exit_val = 1; + return -1; +} + +/* + * buf_flush() + * force the write buffer to the archive. We are passed the number of + * bytes in the buffer at the point of the flush. When we change archives + * the record size might change. (either larger or smaller). + * Return: + * 0 if all is ok, -1 when a write error occurs. + */ + +int +buf_flush(int bufcnt) +{ + int cnt; + int push = 0; + int totcnt = 0; + + /* + * if we have reached the user specified byte count for each archive + * volume, prompt for the next volume. (The non-standard -R flag). + * NOTE: If the wrlimit is smaller than wrcnt, we will always write + * at least one record. We always round limit UP to next blocksize. + */ + if ((wrlimit > 0) && (wrcnt > wrlimit)) { + tty_warn(0, + "User specified archive volume byte limit reached."); + if (ar_next() < 0) { + wrcnt = 0; + exit_val = 1; + return -1; + } + wrcnt = 0; + + /* + * The new archive volume might have changed the size of the + * write blocksize. if so we figure out if we need to write + * (one or more times), or if there is now free space left in + * the buffer (it is no longer full). bufcnt has the number of + * bytes in the buffer, (the blocksize, at the point we were + * CALLED). Push has the amount of "extra" data in the buffer + * if the block size has shrunk from a volume change. + */ + bufend = buf + blksz; + if (blksz > bufcnt) + return 0; + if (blksz < bufcnt) + push = bufcnt - blksz; + } + + /* + * We have enough data to write at least one archive block + */ + for (;;) { + /* + * write a block and check if it all went out ok + */ + cnt = ar_write(buf, blksz); + if (cnt == blksz) { + /* + * the write went ok + */ + wrcnt += cnt; + totcnt += cnt; + if (push > 0) { + /* we have extra data to push to the front. + * check for more than 1 block of push, and if + * so we loop back to write again + */ + memcpy(buf, bufend, push); + bufpt = buf + push; + if (push >= blksz) { + push -= blksz; + continue; + } + } else + bufpt = buf; + return totcnt; + } else if (cnt > 0) { + /* + * Oh drat we got a partial write! + * if format doesnt care about alignment let it go, + * we warned the user in ar_write().... but this means + * the last record on this volume violates pax spec.... + */ + totcnt += cnt; + wrcnt += cnt; + bufpt = buf + cnt; + cnt = bufcnt - cnt; + memcpy(buf, bufpt, cnt); + bufpt = buf + cnt; + if (!frmt->blkalgn || ((cnt % frmt->blkalgn) == 0)) + return totcnt; + break; + } + + /* + * All done, go to next archive + */ + wrcnt = 0; + if (ar_next() < 0) + break; + + /* + * The new archive volume might also have changed the block + * size. if so, figure out if we have too much or too little + * data for using the new block size + */ + bufend = buf + blksz; + if (blksz > bufcnt) + return 0; + if (blksz < bufcnt) + push = bufcnt - blksz; + } + + /* + * write failed, stop pax. we must not create a bad archive! + */ + exit_val = 1; + return -1; +} diff --git a/bin/pax/cpio.1 b/bin/pax/cpio.1 new file mode 100644 index 0000000..45f5854 --- /dev/null +++ b/bin/pax/cpio.1 @@ -0,0 +1,307 @@ +.\" $NetBSD: cpio.1,v 1.15 2017/07/03 21:33:23 wiz Exp $ +.\" +.\" Copyright (c) 1997 SigmaSoft, Th. Lockert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.\" OpenBSD: cpio.1,v 1.14 2000/11/10 17:52:02 aaron Exp +.\" +.Dd June 18, 2011 +.Dt CPIO 1 +.Os +.Sh NAME +.Nm cpio +.Nd copy file archives in and out +.Sh SYNOPSIS +.Nm cpio +.Fl o +.Op Fl AaBcLvZz +.Op Fl C Ar bytes +.Op Fl F Ar archive +.Op Fl H Ar format +.Op Fl O Ar archive +.Ar "< name-list" +.Op Ar "> archive" +.Nm cpio +.Fl i +.Op Fl 6BbcdfmrSstuvZz +.Op Fl C Ar bytes +.Op Fl E Ar file +.Op Fl F Ar archive +.Op Fl H Ar format +.Op Fl I Ar archive +.Op Ar "pattern ..." +.Op Ar "< archive" +.Nm cpio +.Fl p +.Op Fl adLlmuv +.Ar destination-directory +.Ar "< name-list" +.Sh DESCRIPTION +The +.Nm +command copies files to and from a +.Nm +archive. +If the archive is of the form: +.Ar [[user@]host:]file +then the archive will be processed using +.Xr rmt 8 . +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl o , Fl Fl create +Create an archive. +Reads the list of files to store in the +archive from standard input, and writes the archive on standard +output. +.Bl -tag -width Ds +.It Fl a , Fl Fl reset-access-time +Reset the access times on files that have been copied to the +archive. +.It Fl A , Fl Fl append +Append to the specified archive. +.It Fl B +Set block size of output to 5120 bytes. +.It Fl c +Use ASCII format for +.Nm +header for portability. +.It Fl C Ar bytes +Set the block size of output to +.Ar bytes . +.It Fl F Ar archive +.It Fl O Ar archive +Use the specified file name as the archive to write to. +.It Fl H Ar format +Write the archive in the specified format. +Recognized formats are: +.Pp +.Bl -tag -width sv4cpio -compact +.It Ar bcpio +Old binary +.Nm +format. +.It Ar cpio +Old octal character +.Nm +format. +.It Ar sv4cpio +SVR4 hex +.Nm +format. +.It Ar tar +Old tar format. +.It Ar ustar +POSIX ustar format. +.El +.It Fl L +Follow symbolic links. +.It Fl v +Be verbose about operations. +List filenames as they are written to the archive. +.It Fl Fl xz +Compress/decompress archive using +.Xr xz 1 +format. +.It Fl Z +Compress archive using +.Xr compress 1 +format. +.It Fl z +Compress/decompress archive using +.Xr gzip 1 +format. +.El +.It Fl i , Fl Fl extract +Restore files from an archive. +Reads the archive file from +standard input and extracts files matching the +.Ar patterns +that were specified on the command line. +.Bl -tag -width Ds +.It Fl b +Do byte and word swapping after reading in data from the +archive, for restoring archives created on systems with +a different byte order. +.It Fl B +Set the block size of the archive being read to 5120 bytes. +.It Fl c +Expect the archive headers to be in ASCII format. +.It Fl C Ar bytes +Read archive written with a block size of +.Ar bytes . +.It Fl d , Fl Fl make-directories +Create any intermediate directories as needed during +restore. +.It Fl E Ar file , Fl Fl pattern-file Ar file +Read list of file name patterns to extract or list from +.Ar file . +.It Fl f , Fl Fl nonmatching +Restore all files except those matching the +.Ar patterns +given on the command line. +.It Fl F Ar archive , Fl Fl file Ar archive +.It Fl I Ar archive +Use the specified file as the input for the archive. +.It Fl H Ar format , Fl Fl format Ar format +Read an archive of the specified format. +Recognized formats are: +.Pp +.Bl -tag -width sv4cpio -compact +.It Ar bcpio +Old binary +.Nm +format. +.It Ar cpio +Old octal character +.Nm +format. +.It Ar sv4cpio +SVR4 hex +.Nm +format. +.It Ar tar +Old tar format. +.It Ar ustar +POSIX ustar format. +.El +.It Fl m +Restore modification times on files. +.It Fl r , Fl Fl rename +Rename restored files interactively. +.It Fl s +Swap bytes after reading data from the archive. +.It Fl S , Fl Fl swap-halfwords +Swap words after reading data from the archive. +.It Fl t , Fl Fl list +Only list the contents of the archive, no files or +directories will be created. +.It Fl u , Fl Fl unconditional +Overwrite files even when the file in the archive is +older than the one that will be overwritten. +.It Fl v , Fl Fl verbose +Be verbose about operations. +List filenames as they are copied in from the archive. +.It Fl z +Uncompress archive using +.Xr gzip 1 +format. +.It Fl Z +Uncompress archive using +.Xr compress 1 +format. +.It Fl 6 +Process old-style +.Nm +format archives. +.El +.It Fl p , Fl Fl pass-through +Copy files from one location to another in a single pass. +The list of files to copy are read from standard input and +written out to a directory relative to the specified +.Ar directory +argument. +.Bl -tag -width Ds +.It Fl a +Reset the access times on files that have been copied. +.It Fl d +Create any intermediate directories as needed to write +the files at the new location. +.It Fl l , Fl Fl link +When possible, link files rather than creating an +extra copy. +.It Fl L , Fl Fl dereference +Follow symbolic links. +.It Fl m , Fl Fl preserve-modification-time +Restore modification times on files. +.It Fl u , Fl Fl unconditional +Overwrite files even when the original file being copied is +older than the one that will be overwritten. +.It Fl v , Fl Fl verbose +Be verbose about operations. +List filenames as they are copied. +.It Fl Fl force-local +Do not interpret filenames that contain a +.Sq \&: +as remote files. +.It Fl Fl insecure +Normally +.Nm +ignores filenames that contain +.Dq .. +as a path component. +With this option, files that contain +.Dq .. +can be processed. +.El +.El +.Sh EXIT STATUS +.Nm +will exit with one of the following values: +.Bl -tag -width 2n +.It 0 +All files were processed successfully. +.It 1 +An error occurred. +.El +.Pp +Whenever +.Nm +cannot create a file or a link when extracting an archive or cannot +find a file while writing an archive, or cannot preserve the user +ID, group ID, file mode, or access and modification times when the +.Fl p +option is specified, a diagnostic message is written to standard +error and a non-zero exit value will be returned, but processing +will continue. +In the case where +.Nm +cannot create a link to a file, +.Nm +will not create a second copy of the file. +.Pp +If the extraction of a file from an archive is prematurely terminated +by a signal or error, +.Nm +may have only partially extracted the file the user wanted. +Additionally, the file modes of extracted files and directories may +have incorrect file bits, and the modification and access times may +be wrong. +.Pp +If the creation of an archive is prematurely terminated by a signal +or error, +.Nm +may have only partially created the archive which may violate the +specific archive format specification. +.Sh SEE ALSO +.Xr pax 1 , +.Xr tar 1 +.Sh AUTHORS +.An Keith Muller +at the University of California, San Diego. +.Sh BUGS +The +.Fl s +and +.Fl S +options are currently not implemented. diff --git a/bin/pax/cpio.c b/bin/pax/cpio.c new file mode 100644 index 0000000..1a38ba2 --- /dev/null +++ b/bin/pax/cpio.c @@ -0,0 +1,1134 @@ +/* $NetBSD: cpio.c,v 1.22 2012/08/09 08:09:21 christos Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +#if !defined(lint) +#if 0 +static char sccsid[] = "@(#)cpio.c 8.1 (Berkeley) 5/31/93"; +#else +__RCSID("$NetBSD: cpio.c,v 1.22 2012/08/09 08:09:21 christos Exp $"); +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "cpio.h" +#include "extern.h" + +static int rd_nm(ARCHD *, int); +static int rd_ln_nm(ARCHD *); +static int com_rd(ARCHD *); + +/* + * Routines which support the different cpio versions + */ + +int cpio_swp_head; /* binary cpio header byte swap */ + +/* + * Routines common to all versions of cpio + */ + +/* + * cpio_strd() + * Fire up the hard link detection code + * Return: + * 0 if ok -1 otherwise (the return values of lnk_start()) + */ + +int +cpio_strd(void) +{ + return lnk_start(); +} + +/* + * cpio_subtrail() + * Called to determine if a header block is a valid trailer. We are + * passed the block, the in_sync flag (which tells us we are in resync + * mode; looking for a valid header), and cnt (which starts at zero) + * which is used to count the number of empty blocks we have seen so far. + * Return: + * 0 if a valid trailer, -1 if not a valid trailer, + */ + +int +cpio_subtrail(ARCHD *arcn) +{ + /* + * look for trailer id in file we are about to process + */ + if ((strcmp(arcn->name, TRAILER) == 0) && (arcn->sb.st_size == 0)) + return 0; + return -1; +} + +/* + * com_rd() + * operations common to all cpio read functions. + * Return: + * 0 + */ + +static int +com_rd(ARCHD *arcn) +{ + arcn->skip = 0; + arcn->pat = NULL; + arcn->org_name = arcn->name; + switch(arcn->sb.st_mode & C_IFMT) { + case C_ISFIFO: + arcn->type = PAX_FIF; + break; + case C_ISDIR: + arcn->type = PAX_DIR; + break; + case C_ISBLK: + arcn->type = PAX_BLK; + break; + case C_ISCHR: + arcn->type = PAX_CHR; + break; + case C_ISLNK: + arcn->type = PAX_SLK; + break; + case C_ISOCK: + arcn->type = PAX_SCK; + break; + case C_ISCTG: + case C_ISREG: + default: + /* + * we have file data, set up skip (pad is set in the format + * specific sections) + */ + arcn->sb.st_mode = (arcn->sb.st_mode & 0xfff) | C_ISREG; + arcn->type = PAX_REG; + arcn->skip = arcn->sb.st_size; + break; + } + if (chk_lnk(arcn) < 0) + return -1; + return 0; +} + +/* + * cpio_end_wr() + * write the special file with the name trailer in the proper format + * Return: + * result of the write of the trailer from the cpio specific write func + */ + +int +cpio_endwr(void) +{ + ARCHD last; + + /* + * create a trailer request and call the proper format write function + */ + memset(&last, 0, sizeof(last)); + last.nlen = sizeof(TRAILER) - 1; + last.type = PAX_REG; + last.sb.st_nlink = 1; + (void)strcpy(last.name, TRAILER); + return (*frmt->wr)(&last); +} + +/* + * rd_nam() + * read in the file name which follows the cpio header + * Return: + * 0 if ok, -1 otherwise + */ + +static int +rd_nm(ARCHD *arcn, int nsz) +{ + /* + * do not even try bogus values + */ + if ((nsz <= 0) || (nsz > (int)sizeof(arcn->name))) { + tty_warn(1, "Cpio file name length %d is out of range", nsz); + return -1; + } + + /* + * read the name and make sure it is not empty and is \0 terminated + */ + if ((rd_wrbuf(arcn->name,nsz) != nsz) || (arcn->name[nsz-1] != '\0') || + (arcn->name[0] == '\0')) { + tty_warn(1, "Cpio file name in header is corrupted"); + return -1; + } + return 0; +} + +/* + * rd_ln_nm() + * read in the link name for a file with links. The link name is stored + * like file data (and is NOT \0 terminated!) + * Return: + * 0 if ok, -1 otherwise + */ + +static int +rd_ln_nm(ARCHD *arcn) +{ + /* + * check the length specified for bogus values + */ + if ((arcn->sb.st_size == 0) || + (arcn->sb.st_size >= (off_t)sizeof(arcn->ln_name))) { + tty_warn(1, "Cpio link name length is invalid: " OFFT_F, + (OFFT_T) arcn->sb.st_size); + return -1; + } + + /* + * read in the link name and \0 terminate it + */ + if (rd_wrbuf(arcn->ln_name, (int)arcn->sb.st_size) != + (int)arcn->sb.st_size) { + tty_warn(1, "Cpio link name read error"); + return -1; + } + arcn->ln_nlen = arcn->sb.st_size; + arcn->ln_name[arcn->ln_nlen] = '\0'; + + /* + * watch out for those empty link names + */ + if (arcn->ln_name[0] == '\0') { + tty_warn(1, "Cpio link name is corrupt"); + return -1; + } + return 0; +} + +/* + * Routines common to the extended byte oriented cpio format + */ + +/* + * cpio_id() + * determine if a block given to us is a valid extended byte oriented + * cpio header + * Return: + * 0 if a valid header, -1 otherwise + */ + +int +cpio_id(char *blk, int size) +{ + if ((size < (int)sizeof(HD_CPIO)) || + (strncmp(blk, AMAGIC, sizeof(AMAGIC) - 1) != 0)) + return -1; + return 0; +} + +/* + * cpio_rd() + * determine if a buffer is a byte oriented extended cpio archive entry. + * convert and store the values in the ARCHD parameter. + * Return: + * 0 if a valid header, -1 otherwise. + */ + +int +cpio_rd(ARCHD *arcn, char *buf) +{ + int nsz; + HD_CPIO *hd; + + /* + * check that this is a valid header, if not return -1 + */ + if (cpio_id(buf, sizeof(HD_CPIO)) < 0) + return -1; + hd = (HD_CPIO *)buf; + + /* + * byte oriented cpio (posix) does not have padding! extract the octal + * ascii fields from the header + */ + arcn->pad = 0L; + arcn->sb.st_dev = (dev_t)asc_u32(hd->c_dev, sizeof(hd->c_dev), OCT); + arcn->sb.st_ino = (ino_t)asc_u32(hd->c_ino, sizeof(hd->c_ino), OCT); + arcn->sb.st_mode = (mode_t)asc_u32(hd->c_mode, sizeof(hd->c_mode), OCT); + arcn->sb.st_uid = (uid_t)asc_u32(hd->c_uid, sizeof(hd->c_uid), OCT); + arcn->sb.st_gid = (gid_t)asc_u32(hd->c_gid, sizeof(hd->c_gid), OCT); + arcn->sb.st_nlink = (nlink_t)asc_u32(hd->c_nlink, sizeof(hd->c_nlink), + OCT); + arcn->sb.st_rdev = (dev_t)asc_u32(hd->c_rdev, sizeof(hd->c_rdev), OCT); + arcn->sb.st_mtime = (time_t)(int32_t)asc_u32(hd->c_mtime, sizeof(hd->c_mtime), + OCT); + arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; + arcn->sb.st_size = (off_t)ASC_OFFT(hd->c_filesize, + sizeof(hd->c_filesize), OCT); + + /* + * check name size and if valid, read in the name of this entry (name + * follows header in the archive) + */ + if ((nsz = (int)asc_u32(hd->c_namesize,sizeof(hd->c_namesize),OCT)) < 2) + return -1; + arcn->nlen = nsz - 1; + if (rd_nm(arcn, nsz) < 0) + return -1; + + if (((arcn->sb.st_mode&C_IFMT) != C_ISLNK)||(arcn->sb.st_size == 0)) { + /* + * no link name to read for this file + */ + arcn->ln_nlen = 0; + arcn->ln_name[0] = '\0'; + return com_rd(arcn); + } + + /* + * check link name size and read in the link name. Link names are + * stored like file data. + */ + if (rd_ln_nm(arcn) < 0) + return -1; + + /* + * we have a valid header (with a link) + */ + return com_rd(arcn); +} + +/* + * cpio_endrd() + * no cleanup needed here, just return size of the trailer (for append) + * Return: + * size of trailer header in this format + */ + +off_t +cpio_endrd(void) +{ + return (off_t)(sizeof(HD_CPIO) + sizeof(TRAILER)); +} + +/* + * cpio_stwr() + * start up the device mapping table + * Return: + * 0 if ok, -1 otherwise (what dev_start() returns) + */ + +int +cpio_stwr(void) +{ + return dev_start(); +} + +/* + * cpio_wr() + * copy the data in the ARCHD to buffer in extended byte oriented cpio + * format. + * Return + * 0 if file has data to be written after the header, 1 if file has NO + * data to write after the header, -1 if archive write failed + */ + +int +cpio_wr(ARCHD *arcn) +{ + HD_CPIO *hd; + int nsz; + char hdblk[sizeof(HD_CPIO)]; + + /* + * check and repair truncated device and inode fields in the header + */ + if (map_dev(arcn, (u_long)CPIO_MASK, (u_long)CPIO_MASK) < 0) + return -1; + + arcn->pad = 0L; + nsz = arcn->nlen + 1; + hd = (HD_CPIO *)hdblk; + if ((arcn->type != PAX_BLK) && (arcn->type != PAX_CHR)) + arcn->sb.st_rdev = 0; + + switch(arcn->type) { + case PAX_CTG: + case PAX_REG: + case PAX_HRG: + /* + * set data size for file data + */ + if (OFFT_ASC(arcn->sb.st_size, hd->c_filesize, + sizeof(hd->c_filesize), OCT)) { + tty_warn(1,"File is too large for cpio format %s", + arcn->org_name); + return 1; + } + break; + case PAX_SLK: + /* + * set data size to hold link name + */ + if (u32_asc((uintmax_t)arcn->ln_nlen, hd->c_filesize, + sizeof(hd->c_filesize), OCT)) + goto out; + break; + default: + /* + * all other file types have no file data + */ + if (u32_asc((uintmax_t)0, hd->c_filesize, sizeof(hd->c_filesize), + OCT)) + goto out; + break; + } + + /* + * copy the values to the header using octal ascii + */ + if (u32_asc((uintmax_t)MAGIC, hd->c_magic, sizeof(hd->c_magic), OCT) || + u32_asc((uintmax_t)arcn->sb.st_dev, hd->c_dev, sizeof(hd->c_dev), + OCT) || + u32_asc((uintmax_t)arcn->sb.st_ino, hd->c_ino, sizeof(hd->c_ino), + OCT) || + u32_asc((uintmax_t)arcn->sb.st_mode, hd->c_mode, sizeof(hd->c_mode), + OCT) || + u32_asc((uintmax_t)arcn->sb.st_uid, hd->c_uid, sizeof(hd->c_uid), + OCT) || + u32_asc((uintmax_t)arcn->sb.st_gid, hd->c_gid, sizeof(hd->c_gid), + OCT) || + u32_asc((uintmax_t)arcn->sb.st_nlink, hd->c_nlink, sizeof(hd->c_nlink), + OCT) || + u32_asc((uintmax_t)arcn->sb.st_rdev, hd->c_rdev, sizeof(hd->c_rdev), + OCT) || + u32_asc((uintmax_t)arcn->sb.st_mtime,hd->c_mtime,sizeof(hd->c_mtime), + OCT) || + u32_asc((uintmax_t)nsz, hd->c_namesize, sizeof(hd->c_namesize), OCT)) + goto out; + + /* + * write the file name to the archive + */ + if ((wr_rdbuf(hdblk, (int)sizeof(HD_CPIO)) < 0) || + (wr_rdbuf(arcn->name, nsz) < 0)) { + tty_warn(1, "Unable to write cpio header for %s", + arcn->org_name); + return -1; + } + + /* + * if this file has data, we are done. The caller will write the file + * data, if we are link tell caller we are done, go to next file + */ + if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) + return 0; + if (arcn->type != PAX_SLK) + return 1; + + /* + * write the link name to the archive, tell the caller to go to the + * next file as we are done. + */ + if (wr_rdbuf(arcn->ln_name, arcn->ln_nlen) < 0) { + tty_warn(1,"Unable to write cpio link name for %s", + arcn->org_name); + return -1; + } + return 1; + + out: + /* + * header field is out of range + */ + tty_warn(1, "Cpio header field is too small to store file %s", + arcn->org_name); + return 1; +} + +/* + * Routines common to the system VR4 version of cpio (with/without file CRC) + */ + +/* + * vcpio_id() + * determine if a block given to us is a valid system VR4 cpio header + * WITHOUT crc. WATCH it the magic cookies are in OCTAL, the header + * uses HEX + * Return: + * 0 if a valid header, -1 otherwise + */ + +int +vcpio_id(char *blk, int size) +{ + if ((size < (int)sizeof(HD_VCPIO)) || + (strncmp(blk, AVMAGIC, sizeof(AVMAGIC) - 1) != 0)) + return -1; + return 0; +} + +/* + * crc_id() + * determine if a block given to us is a valid system VR4 cpio header + * WITH crc. WATCH it the magic cookies are in OCTAL the header uses HEX + * Return: + * 0 if a valid header, -1 otherwise + */ + +int +crc_id(char *blk, int size) +{ + if ((size < (int)sizeof(HD_VCPIO)) || + (strncmp(blk, AVCMAGIC, sizeof(AVCMAGIC) - 1) != 0)) + return -1; + return 0; +} + +/* + * crc_strd() + * set file data CRC calculations. Fire up the hard link detection code + * Return: + * 0 if ok -1 otherwise (the return values of lnk_start()) + */ + +int +crc_strd(void) +{ + docrc = 1; + return lnk_start(); +} + +/* + * vcpio_rd() + * determine if a buffer is a system VR4 archive entry. (with/without CRC) + * convert and store the values in the ARCHD parameter. + * Return: + * 0 if a valid header, -1 otherwise. + */ + +int +vcpio_rd(ARCHD *arcn, char *buf) +{ + HD_VCPIO *hd; + dev_t devminor; + dev_t devmajor; + int nsz; + + /* + * during the id phase it was determined if we were using CRC, use the + * proper id routine. + */ + if (docrc) { + if (crc_id(buf, sizeof(HD_VCPIO)) < 0) + return -1; + } else { + if (vcpio_id(buf, sizeof(HD_VCPIO)) < 0) + return -1; + } + + hd = (HD_VCPIO *)buf; + arcn->pad = 0L; + + /* + * extract the hex ascii fields from the header + */ + arcn->sb.st_ino = (ino_t)asc_u32(hd->c_ino, sizeof(hd->c_ino), HEX); + arcn->sb.st_mode = (mode_t)asc_u32(hd->c_mode, sizeof(hd->c_mode), HEX); + arcn->sb.st_uid = (uid_t)asc_u32(hd->c_uid, sizeof(hd->c_uid), HEX); + arcn->sb.st_gid = (gid_t)asc_u32(hd->c_gid, sizeof(hd->c_gid), HEX); + arcn->sb.st_mtime = (time_t)(int32_t)asc_u32(hd->c_mtime,sizeof(hd->c_mtime),HEX); + arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; + arcn->sb.st_size = (off_t)ASC_OFFT(hd->c_filesize, + sizeof(hd->c_filesize), HEX); + arcn->sb.st_nlink = (nlink_t)asc_u32(hd->c_nlink, sizeof(hd->c_nlink), + HEX); + devmajor = (dev_t)asc_u32(hd->c_maj, sizeof(hd->c_maj), HEX); + devminor = (dev_t)asc_u32(hd->c_min, sizeof(hd->c_min), HEX); + arcn->sb.st_dev = TODEV(devmajor, devminor); + devmajor = (dev_t)asc_u32(hd->c_rmaj, sizeof(hd->c_maj), HEX); + devminor = (dev_t)asc_u32(hd->c_rmin, sizeof(hd->c_min), HEX); + arcn->sb.st_rdev = TODEV(devmajor, devminor); + arcn->crc = asc_u32(hd->c_chksum, sizeof(hd->c_chksum), HEX); + + /* + * check the length of the file name, if ok read it in, return -1 if + * bogus + */ + if ((nsz = (int)asc_u32(hd->c_namesize,sizeof(hd->c_namesize),HEX)) < 2) + return -1; + arcn->nlen = nsz - 1; + if (rd_nm(arcn, nsz) < 0) + return -1; + + /* + * skip padding. header + filename is aligned to 4 byte boundaries + */ + if (rd_skip((off_t)(VCPIO_PAD(sizeof(HD_VCPIO) + nsz))) < 0) + return -1; + + /* + * if not a link (or a file with no data), calculate pad size (for + * padding which follows the file data), clear the link name and return + */ + if (((arcn->sb.st_mode&C_IFMT) != C_ISLNK)||(arcn->sb.st_size == 0)) { + /* + * we have a valid header (not a link) + */ + arcn->ln_nlen = 0; + arcn->ln_name[0] = '\0'; + arcn->pad = VCPIO_PAD(arcn->sb.st_size); + return com_rd(arcn); + } + + /* + * read in the link name and skip over the padding + */ + if ((rd_ln_nm(arcn) < 0) || + (rd_skip((off_t)(VCPIO_PAD(arcn->sb.st_size))) < 0)) + return -1; + + /* + * we have a valid header (with a link) + */ + return com_rd(arcn); +} + +/* + * vcpio_endrd() + * no cleanup needed here, just return size of the trailer (for append) + * Return: + * size of trailer header in this format + */ + +off_t +vcpio_endrd(void) +{ + return (off_t)(sizeof(HD_VCPIO) + sizeof(TRAILER) + + (VCPIO_PAD(sizeof(HD_VCPIO) + sizeof(TRAILER)))); +} + +/* + * crc_stwr() + * start up the device mapping table, enable crc file calculation + * Return: + * 0 if ok, -1 otherwise (what dev_start() returns) + */ + +int +crc_stwr(void) +{ + docrc = 1; + return dev_start(); +} + +/* + * vcpio_wr() + * copy the data in the ARCHD to buffer in system VR4 cpio + * (with/without crc) format. + * Return + * 0 if file has data to be written after the header, 1 if file has + * NO data to write after the header, -1 if archive write failed + */ + +int +vcpio_wr(ARCHD *arcn) +{ + HD_VCPIO *hd; + unsigned int nsz; + char hdblk[sizeof(HD_VCPIO)]; + + /* + * check and repair truncated device and inode fields in the cpio + * header + */ + if (map_dev(arcn, (u_long)VCPIO_MASK, (u_long)VCPIO_MASK) < 0) + return -1; + nsz = arcn->nlen + 1; + hd = (HD_VCPIO *)hdblk; + if ((arcn->type != PAX_BLK) && (arcn->type != PAX_CHR)) + arcn->sb.st_rdev = 0; + + /* + * add the proper magic value depending whether we were asked for + * file data crc's, and the crc if needed. + */ + if (docrc) { + if (u32_asc((uintmax_t)VCMAGIC, hd->c_magic, sizeof(hd->c_magic), + OCT) || + u32_asc((uintmax_t)arcn->crc,hd->c_chksum,sizeof(hd->c_chksum), + HEX)) + goto out; + } else { + if (u32_asc((uintmax_t)VMAGIC, hd->c_magic, sizeof(hd->c_magic), + OCT) || + u32_asc((uintmax_t)0, hd->c_chksum, sizeof(hd->c_chksum),HEX)) + goto out; + } + + switch(arcn->type) { + case PAX_CTG: + case PAX_REG: + case PAX_HRG: + /* + * caller will copy file data to the archive. tell him how + * much to pad. + */ + arcn->pad = VCPIO_PAD(arcn->sb.st_size); + if (OFFT_ASC(arcn->sb.st_size, hd->c_filesize, + sizeof(hd->c_filesize), HEX)) { + tty_warn(1,"File is too large for sv4cpio format %s", + arcn->org_name); + return 1; + } + break; + case PAX_SLK: + /* + * no file data for the caller to process, the file data has + * the size of the link + */ + arcn->pad = 0L; + if (u32_asc((uintmax_t)arcn->ln_nlen, hd->c_filesize, + sizeof(hd->c_filesize), HEX)) + goto out; + break; + default: + /* + * no file data for the caller to process + */ + arcn->pad = 0L; + if (u32_asc((uintmax_t)0, hd->c_filesize, sizeof(hd->c_filesize), + HEX)) + goto out; + break; + } + + /* + * set the other fields in the header + */ + if (u32_asc((uintmax_t)arcn->sb.st_ino, hd->c_ino, sizeof(hd->c_ino), + HEX) || + u32_asc((uintmax_t)arcn->sb.st_mode, hd->c_mode, sizeof(hd->c_mode), + HEX) || + u32_asc((uintmax_t)arcn->sb.st_uid, hd->c_uid, sizeof(hd->c_uid), + HEX) || + u32_asc((uintmax_t)arcn->sb.st_gid, hd->c_gid, sizeof(hd->c_gid), + HEX) || + u32_asc((uintmax_t)arcn->sb.st_mtime, hd->c_mtime, sizeof(hd->c_mtime), + HEX) || + u32_asc((uintmax_t)arcn->sb.st_nlink, hd->c_nlink, sizeof(hd->c_nlink), + HEX) || + u32_asc((uintmax_t)MAJOR(arcn->sb.st_dev),hd->c_maj, sizeof(hd->c_maj), + HEX) || + u32_asc((uintmax_t)MINOR(arcn->sb.st_dev),hd->c_min, sizeof(hd->c_min), + HEX) || + u32_asc((uintmax_t)MAJOR(arcn->sb.st_rdev),hd->c_rmaj,sizeof(hd->c_maj), + HEX) || + u32_asc((uintmax_t)MINOR(arcn->sb.st_rdev),hd->c_rmin,sizeof(hd->c_min), + HEX) || + u32_asc((uintmax_t)nsz, hd->c_namesize, sizeof(hd->c_namesize), HEX)) + goto out; + + /* + * write the header, the file name and padding as required. + */ + if ((wr_rdbuf(hdblk, (int)sizeof(HD_VCPIO)) < 0) || + (wr_rdbuf(arcn->name, (int)nsz) < 0) || + (wr_skip((off_t)(VCPIO_PAD(sizeof(HD_VCPIO) + nsz))) < 0)) { + tty_warn(1,"Could not write sv4cpio header for %s", + arcn->org_name); + return -1; + } + + /* + * if we have file data, tell the caller we are done, copy the file + */ + if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG) || + (arcn->type == PAX_HRG)) + return 0; + + /* + * if we are not a link, tell the caller we are done, go to next file + */ + if (arcn->type != PAX_SLK) + return 1; + + /* + * write the link name, tell the caller we are done. + */ + if ((wr_rdbuf(arcn->ln_name, arcn->ln_nlen) < 0) || + (wr_skip((off_t)(VCPIO_PAD(arcn->ln_nlen))) < 0)) { + tty_warn(1,"Could not write sv4cpio link name for %s", + arcn->org_name); + return -1; + } + return 1; + + out: + /* + * header field is out of range + */ + tty_warn(1,"Sv4cpio header field is too small for file %s", + arcn->org_name); + return 1; +} + +/* + * Routines common to the old binary header cpio + */ + +/* + * bcpio_id() + * determine if a block given to us is a old binary cpio header + * (with/without header byte swapping) + * Return: + * 0 if a valid header, -1 otherwise + */ + +int +bcpio_id(char *blk, int size) +{ + if (size < (int)sizeof(HD_BCPIO)) + return -1; + + /* + * check both normal and byte swapped magic cookies + */ + if (((u_short)SHRT_EXT(blk)) == MAGIC) + return 0; + if (((u_short)RSHRT_EXT(blk)) == MAGIC) { + if (!cpio_swp_head) + ++cpio_swp_head; + return 0; + } + return -1; +} + +/* + * bcpio_rd() + * determine if a buffer is a old binary archive entry. (it may have byte + * swapped header) convert and store the values in the ARCHD parameter. + * This is a very old header format and should not really be used. + * Return: + * 0 if a valid header, -1 otherwise. + */ + +int +bcpio_rd(ARCHD *arcn, char *buf) +{ + HD_BCPIO *hd; + int nsz; + + /* + * check the header + */ + if (bcpio_id(buf, sizeof(HD_BCPIO)) < 0) + return -1; + + arcn->pad = 0L; + hd = (HD_BCPIO *)buf; + if (cpio_swp_head) { + /* + * header has swapped bytes on 16 bit boundaries + */ + arcn->sb.st_dev = (dev_t)(RSHRT_EXT(hd->h_dev)); + arcn->sb.st_ino = (ino_t)(RSHRT_EXT(hd->h_ino)); + arcn->sb.st_mode = (mode_t)(RSHRT_EXT(hd->h_mode)); + arcn->sb.st_uid = (uid_t)(RSHRT_EXT(hd->h_uid)); + arcn->sb.st_gid = (gid_t)(RSHRT_EXT(hd->h_gid)); + arcn->sb.st_nlink = (nlink_t)(RSHRT_EXT(hd->h_nlink)); + arcn->sb.st_rdev = (dev_t)(RSHRT_EXT(hd->h_rdev)); + arcn->sb.st_mtime = (time_t)(RSHRT_EXT(hd->h_mtime_1)); + arcn->sb.st_mtime = (arcn->sb.st_mtime << 16) | + ((time_t)(RSHRT_EXT(hd->h_mtime_2))); + arcn->sb.st_size = (off_t)(RSHRT_EXT(hd->h_filesize_1)); + arcn->sb.st_size = (arcn->sb.st_size << 16) | + ((off_t)(RSHRT_EXT(hd->h_filesize_2))); + nsz = (int)(RSHRT_EXT(hd->h_namesize)); + } else { + arcn->sb.st_dev = (dev_t)(SHRT_EXT(hd->h_dev)); + arcn->sb.st_ino = (ino_t)(SHRT_EXT(hd->h_ino)); + arcn->sb.st_mode = (mode_t)(SHRT_EXT(hd->h_mode)); + arcn->sb.st_uid = (uid_t)(SHRT_EXT(hd->h_uid)); + arcn->sb.st_gid = (gid_t)(SHRT_EXT(hd->h_gid)); + arcn->sb.st_nlink = (nlink_t)(SHRT_EXT(hd->h_nlink)); + arcn->sb.st_rdev = (dev_t)(SHRT_EXT(hd->h_rdev)); + arcn->sb.st_mtime = (time_t)(SHRT_EXT(hd->h_mtime_1)); + arcn->sb.st_mtime = (arcn->sb.st_mtime << 16) | + ((time_t)(SHRT_EXT(hd->h_mtime_2))); + arcn->sb.st_size = (off_t)(SHRT_EXT(hd->h_filesize_1)); + arcn->sb.st_size = (arcn->sb.st_size << 16) | + ((off_t)(SHRT_EXT(hd->h_filesize_2))); + nsz = (int)(SHRT_EXT(hd->h_namesize)); + } + arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; + + /* + * check the file name size, if bogus give up. otherwise read the file + * name + */ + if (nsz < 2) + return -1; + arcn->nlen = nsz - 1; + if (rd_nm(arcn, nsz) < 0) + return -1; + + /* + * header + file name are aligned to 2 byte boundaries, skip if needed + */ + if (rd_skip((off_t)(BCPIO_PAD(sizeof(HD_BCPIO) + nsz))) < 0) + return -1; + + /* + * if not a link (or a file with no data), calculate pad size (for + * padding which follows the file data), clear the link name and return + */ + if (((arcn->sb.st_mode & C_IFMT) != C_ISLNK)||(arcn->sb.st_size == 0)){ + /* + * we have a valid header (not a link) + */ + arcn->ln_nlen = 0; + arcn->ln_name[0] = '\0'; + arcn->pad = BCPIO_PAD(arcn->sb.st_size); + return com_rd(arcn); + } + + if ((rd_ln_nm(arcn) < 0) || + (rd_skip((off_t)(BCPIO_PAD(arcn->sb.st_size))) < 0)) + return -1; + + /* + * we have a valid header (with a link) + */ + return com_rd(arcn); +} + +/* + * bcpio_endrd() + * no cleanup needed here, just return size of the trailer (for append) + * Return: + * size of trailer header in this format + */ + +off_t +bcpio_endrd(void) +{ + return (off_t)(sizeof(HD_BCPIO) + sizeof(TRAILER) + + (BCPIO_PAD(sizeof(HD_BCPIO) + sizeof(TRAILER)))); +} + +/* + * bcpio_wr() + * copy the data in the ARCHD to buffer in old binary cpio format + * There is a real chance of field overflow with this critter. So we + * always check the conversion is ok. nobody in their right mind + * should write an archive in this format... + * Return + * 0 if file has data to be written after the header, 1 if file has NO + * data to write after the header, -1 if archive write failed + */ + +int +bcpio_wr(ARCHD *arcn) +{ + HD_BCPIO *hd; + int nsz; + char hdblk[sizeof(HD_BCPIO)]; + off_t t_offt; + int t_int; + time_t t_timet; + + /* + * check and repair truncated device and inode fields in the cpio + * header + */ + if (map_dev(arcn, (u_long)BCPIO_MASK, (u_long)BCPIO_MASK) < 0) + return -1; + + if ((arcn->type != PAX_BLK) && (arcn->type != PAX_CHR)) + arcn->sb.st_rdev = 0; + hd = (HD_BCPIO *)hdblk; + + switch(arcn->type) { + case PAX_CTG: + case PAX_REG: + case PAX_HRG: + /* + * caller will copy file data to the archive. tell him how + * much to pad. + */ + arcn->pad = BCPIO_PAD(arcn->sb.st_size); + hd->h_filesize_1[0] = CHR_WR_0(arcn->sb.st_size); + hd->h_filesize_1[1] = CHR_WR_1(arcn->sb.st_size); + hd->h_filesize_2[0] = CHR_WR_2(arcn->sb.st_size); + hd->h_filesize_2[1] = CHR_WR_3(arcn->sb.st_size); + t_offt = (off_t)(SHRT_EXT(hd->h_filesize_1)); + t_offt = (t_offt<<16) | ((off_t)(SHRT_EXT(hd->h_filesize_2))); + if (arcn->sb.st_size != t_offt) { + tty_warn(1,"File is too large for bcpio format %s", + arcn->org_name); + return 1; + } + break; + case PAX_SLK: + /* + * no file data for the caller to process, the file data has + * the size of the link + */ + arcn->pad = 0L; + hd->h_filesize_1[0] = CHR_WR_0(arcn->ln_nlen); + hd->h_filesize_1[1] = CHR_WR_1(arcn->ln_nlen); + hd->h_filesize_2[0] = CHR_WR_2(arcn->ln_nlen); + hd->h_filesize_2[1] = CHR_WR_3(arcn->ln_nlen); + t_int = (int)(SHRT_EXT(hd->h_filesize_1)); + t_int = (t_int << 16) | ((int)(SHRT_EXT(hd->h_filesize_2))); + if (arcn->ln_nlen != t_int) + goto out; + break; + default: + /* + * no file data for the caller to process + */ + arcn->pad = 0L; + hd->h_filesize_1[0] = (char)0; + hd->h_filesize_1[1] = (char)0; + hd->h_filesize_2[0] = (char)0; + hd->h_filesize_2[1] = (char)0; + break; + } + + /* + * build up the rest of the fields + */ + hd->h_magic[0] = CHR_WR_2(MAGIC); + hd->h_magic[1] = CHR_WR_3(MAGIC); + hd->h_dev[0] = CHR_WR_2(arcn->sb.st_dev); + hd->h_dev[1] = CHR_WR_3(arcn->sb.st_dev); + if (arcn->sb.st_dev != (dev_t)(SHRT_EXT(hd->h_dev))) + goto out; + hd->h_ino[0] = CHR_WR_2(arcn->sb.st_ino); + hd->h_ino[1] = CHR_WR_3(arcn->sb.st_ino); + if (arcn->sb.st_ino != (ino_t)(SHRT_EXT(hd->h_ino))) + goto out; + hd->h_mode[0] = CHR_WR_2(arcn->sb.st_mode); + hd->h_mode[1] = CHR_WR_3(arcn->sb.st_mode); + if (arcn->sb.st_mode != (mode_t)(SHRT_EXT(hd->h_mode))) + goto out; + hd->h_uid[0] = CHR_WR_2(arcn->sb.st_uid); + hd->h_uid[1] = CHR_WR_3(arcn->sb.st_uid); + if (arcn->sb.st_uid != (uid_t)(SHRT_EXT(hd->h_uid))) + goto out; + hd->h_gid[0] = CHR_WR_2(arcn->sb.st_gid); + hd->h_gid[1] = CHR_WR_3(arcn->sb.st_gid); + if (arcn->sb.st_gid != (gid_t)(SHRT_EXT(hd->h_gid))) + goto out; + hd->h_nlink[0] = CHR_WR_2(arcn->sb.st_nlink); + hd->h_nlink[1] = CHR_WR_3(arcn->sb.st_nlink); + if (arcn->sb.st_nlink != (nlink_t)(SHRT_EXT(hd->h_nlink))) + goto out; + hd->h_rdev[0] = CHR_WR_2(arcn->sb.st_rdev); + hd->h_rdev[1] = CHR_WR_3(arcn->sb.st_rdev); + if (arcn->sb.st_rdev != (dev_t)(SHRT_EXT(hd->h_rdev))) + goto out; + hd->h_mtime_1[0] = CHR_WR_0(arcn->sb.st_mtime); + hd->h_mtime_1[1] = CHR_WR_1(arcn->sb.st_mtime); + hd->h_mtime_2[0] = CHR_WR_2(arcn->sb.st_mtime); + hd->h_mtime_2[1] = CHR_WR_3(arcn->sb.st_mtime); + t_timet = (time_t)(SHRT_EXT(hd->h_mtime_1)); + t_timet = (t_timet << 16) | ((time_t)(SHRT_EXT(hd->h_mtime_2))); + if (arcn->sb.st_mtime != t_timet) + goto out; + nsz = arcn->nlen + 1; + hd->h_namesize[0] = CHR_WR_2(nsz); + hd->h_namesize[1] = CHR_WR_3(nsz); + if (nsz != (int)(SHRT_EXT(hd->h_namesize))) + goto out; + + /* + * write the header, the file name and padding as required. + */ + if ((wr_rdbuf(hdblk, (int)sizeof(HD_BCPIO)) < 0) || + (wr_rdbuf(arcn->name, nsz) < 0) || + (wr_skip((off_t)(BCPIO_PAD(sizeof(HD_BCPIO) + nsz))) < 0)) { + tty_warn(1, "Could not write bcpio header for %s", + arcn->org_name); + return -1; + } + + /* + * if we have file data, tell the caller we are done + */ + if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG) || + (arcn->type == PAX_HRG)) + return 0; + + /* + * if we are not a link, tell the caller we are done, go to next file + */ + if (arcn->type != PAX_SLK) + return 1; + + /* + * write the link name, tell the caller we are done. + */ + if ((wr_rdbuf(arcn->ln_name, arcn->ln_nlen) < 0) || + (wr_skip((off_t)(BCPIO_PAD(arcn->ln_nlen))) < 0)) { + tty_warn(1,"Could not write bcpio link name for %s", + arcn->org_name); + return -1; + } + return 1; + + out: + /* + * header field is out of range + */ + tty_warn(1,"Bcpio header field is too small for file %s", + arcn->org_name); + return 1; +} diff --git a/bin/pax/cpio.h b/bin/pax/cpio.h new file mode 100644 index 0000000..bbf40ed --- /dev/null +++ b/bin/pax/cpio.h @@ -0,0 +1,149 @@ +/* $NetBSD: cpio.h,v 1.6 2003/10/13 07:41:22 agc Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cpio.h 8.1 (Berkeley) 5/31/93 + */ + +/* + * Defines common to all versions of cpio + */ +#define TRAILER "TRAILER!!!" /* name in last archive record */ + +/* + * Header encoding of the different file types + */ +#define C_ISDIR 040000 /* Directory */ +#define C_ISFIFO 010000 /* FIFO */ +#define C_ISREG 0100000 /* Regular file */ +#define C_ISBLK 060000 /* Block special file */ +#define C_ISCHR 020000 /* Character special file */ +#define C_ISCTG 0110000 /* Reserved for contiguous files */ +#define C_ISLNK 0120000 /* Reserved for symbolic links */ +#define C_ISOCK 0140000 /* Reserved for sockets */ +#define C_IFMT 0170000 /* type of file */ + +/* + * Data Interchange Format - Extended cpio header format - POSIX 1003.1-1990 + */ +typedef struct { + char c_magic[6]; /* magic cookie */ + char c_dev[6]; /* device number */ + char c_ino[6]; /* inode number */ + char c_mode[6]; /* file type/access */ + char c_uid[6]; /* owners uid */ + char c_gid[6]; /* owners gid */ + char c_nlink[6]; /* # of links at archive creation */ + char c_rdev[6]; /* block/char major/minor # */ + char c_mtime[11]; /* modification time */ + char c_namesize[6]; /* length of pathname */ + char c_filesize[11]; /* length of file in bytes */ +} HD_CPIO; + +#define MAGIC 070707 /* transportable archive id */ + +#ifdef _PAX_ +#define AMAGIC "070707" /* ascii equivalent string of MAGIC */ +#define CPIO_MASK 0x3ffff /* bits valid in the dev/ino fields */ + /* used for dev/inode remaps */ +#endif /* _PAX_ */ + +/* + * Binary cpio header structure + * + * CAUTION! CAUTION! CAUTION! + * Each field really represents a 16 bit short (NOT ASCII). Described as + * an array of chars in an attempt to improve portability!! + */ +typedef struct { + u_char h_magic[2]; + u_char h_dev[2]; + u_char h_ino[2]; + u_char h_mode[2]; + u_char h_uid[2]; + u_char h_gid[2]; + u_char h_nlink[2]; + u_char h_rdev[2]; + u_char h_mtime_1[2]; + u_char h_mtime_2[2]; + u_char h_namesize[2]; + u_char h_filesize_1[2]; + u_char h_filesize_2[2]; +} HD_BCPIO; + +#ifdef _PAX_ +/* + * extraction and creation macros for binary cpio + */ +#define SHRT_EXT(ch) ((((unsigned)(ch)[0])<<8) | (((unsigned)(ch)[1])&0xff)) +#define RSHRT_EXT(ch) ((((unsigned)(ch)[1])<<8) | (((unsigned)(ch)[0])&0xff)) +#define CHR_WR_0(val) ((char)(((val) >> 24) & 0xff)) +#define CHR_WR_1(val) ((char)(((val) >> 16) & 0xff)) +#define CHR_WR_2(val) ((char)(((val) >> 8) & 0xff)) +#define CHR_WR_3(val) ((char)((val) & 0xff)) + +/* + * binary cpio masks and pads + */ +#define BCPIO_PAD(x) ((2 - ((x) & 1)) & 1) /* pad to next 2 byte word */ +#define BCPIO_MASK 0xffff /* mask for dev/ino fields */ +#endif /* _PAX_ */ + +/* + * System VR4 cpio header structure (with/without file data crc) + */ +typedef struct { + char c_magic[6]; /* magic cookie */ + char c_ino[8]; /* inode number */ + char c_mode[8]; /* file type/access */ + char c_uid[8]; /* owners uid */ + char c_gid[8]; /* owners gid */ + char c_nlink[8]; /* # of links at archive creation */ + char c_mtime[8]; /* modification time */ + char c_filesize[8]; /* length of file in bytes */ + char c_maj[8]; /* block/char major # */ + char c_min[8]; /* block/char minor # */ + char c_rmaj[8]; /* special file major # */ + char c_rmin[8]; /* special file minor # */ + char c_namesize[8]; /* length of pathname */ + char c_chksum[8]; /* 0 OR CRC of bytes of FILE data */ +} HD_VCPIO; + +#define VMAGIC 070701 /* sVr4 new portable archive id */ +#define VCMAGIC 070702 /* sVr4 new portable archive id CRC */ +#ifdef _PAX_ +#define AVMAGIC "070701" /* ascii string of above */ +#define AVCMAGIC "070702" /* ascii string of above */ +#define VCPIO_PAD(x) ((4 - ((x) & 3)) & 3) /* pad to next 4 byte word */ +#define VCPIO_MASK 0xffffffff /* mask for dev/ino fields */ +#endif /* _PAX_ */ diff --git a/bin/pax/dumptar.c b/bin/pax/dumptar.c new file mode 100644 index 0000000..5538702 --- /dev/null +++ b/bin/pax/dumptar.c @@ -0,0 +1,131 @@ +/* $NetBSD: dumptar.c,v 1.3 2016/05/30 17:34:35 dholland Exp $ */ + +/*- + * Copyright (c) 2004 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Christos Zoulas. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tar.h" + +#define ussum(a) 1 + +/* + * Ensure null termination. + */ +static char * +buf(const char *p, size_t s) +{ + static char buf[1024]; + + assert(s < sizeof(buf)); + memcpy(buf, p, s); + buf[s] = '\0'; + return buf; +} + +static int +intarg(const char *p, size_t s) +{ + char *ep, *b = buf(p, s); + int r = (int)strtol(b, &ep, 8); + return r; +} + +static int +usdump(void *p) +{ + HD_USTAR *t = p; + int size = intarg(t->size, sizeof(t->size)); + size = ((size + 511) / 512) * 512 + 512; + + (void)fprintf(stdout, "*****\n"); +#define PR(a) \ + (void)fprintf(stdout, #a "=%s\n", buf(t->a, sizeof(t->a))); +#define IPR(a) \ + (void)fprintf(stdout, #a "=%d\n", intarg(t->a, sizeof(t->a))); +#define OPR(a) \ + (void)fprintf(stdout, #a "=%o\n", intarg(t->a, sizeof(t->a))); + PR(name); + OPR(mode); + IPR(uid); + IPR(gid); + IPR(size); + OPR(mtime); + OPR(chksum); + (void)fprintf(stdout, "typeflag=%c\n", t->typeflag); + PR(linkname); + PR(magic); + PR(version); + PR(uname); + PR(gname); + OPR(devmajor); + OPR(devminor); + PR(prefix); + return size; +} + +int +main(int argc, char *argv[]) +{ + int fd; + struct stat st; + char *p, *ep; + + if (argc != 2) { + (void)fprintf(stderr, "Usage: %s \n", getprogname()); + return 1; + } + + if ((fd = open(argv[1], O_RDONLY)) == -1) + err(1, "Cannot open `%s'", argv[1]); + + if (fstat(fd, &st) == -1) + err(1, "Cannot fstat `%s'", argv[1]); + + if ((p = mmap(NULL, (size_t)st.st_size, PROT_READ, + MAP_FILE|MAP_PRIVATE, fd, (off_t)0)) == MAP_FAILED) + err(1, "Cannot mmap `%s'", argv[1]); + (void)close(fd); + + ep = (char *)p + (size_t)st.st_size; + + for (; p < ep + sizeof(HD_USTAR);) { + if (ussum(p)) + p += usdump(p); + } + return 0; +} diff --git a/bin/pax/extern.h b/bin/pax/extern.h new file mode 100644 index 0000000..298600c --- /dev/null +++ b/bin/pax/extern.h @@ -0,0 +1,326 @@ +/* $NetBSD: extern.h,v 1.59 2012/08/09 08:09:21 christos Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)extern.h 8.2 (Berkeley) 4/18/94 + */ + +/* + * External references from each source file + */ + +#include +#include + +/* + * ar_io.c + */ +extern const char *arcname; +extern int curdirfd; +extern const char *gzip_program; +extern time_t starttime; +extern int force_one_volume; +extern char *chdname; +extern int forcelocal; +extern int secure; + +int ar_open(const char *); +void ar_close(void); +void ar_drain(void); +int ar_set_wr(void); +int ar_app_ok(void); +#ifdef SYS_NO_RESTART +int read_with_restart(int, void *, int); +int write_with_restart(int, void *, int); +#else +#define read_with_restart read +#define write_with_restart write +#endif +int xread(int, void *, int); +int xwrite(int, void *, int); +int ar_read(char *, int); +int ar_write(char *, int); +int ar_rdsync(void); +int ar_fow(off_t, off_t *); +int ar_rev(off_t ); +int ar_next(void); +void ar_summary(int); +int ar_dochdir(const char *); + +/* + * ar_subs.c + */ +extern u_long flcnt; +extern ARCHD archd; +int updatepath(void); +int dochdir(const char *); +int fdochdir(int); +int domkdir(const char *, mode_t); +int list(void); +int extract(void); +int append(void); +int archive(void); +int copy(void); + +/* + * buf_subs.c + */ +extern int blksz; +extern int wrblksz; +extern int maxflt; +extern int rdblksz; +extern off_t wrlimit; +extern off_t rdcnt; +extern off_t wrcnt; +int wr_start(void); +int rd_start(void); +void cp_start(void); +int appnd_start(off_t); +int rd_sync(void); +void pback(char *, int); +int rd_skip(off_t); +void wr_fin(void); +int wr_rdbuf(char *, int); +int rd_wrbuf(char *, int); +int wr_skip(off_t); +int wr_rdfile(ARCHD *, int, off_t *); +int rd_wrfile(ARCHD *, int, off_t *); +void cp_file(ARCHD *, int, int); +int buf_fill(void); +int buf_flush(int); + +/* + * cpio.c + */ +extern int cpio_swp_head; +int cpio_strd(void); +int cpio_subtrail(ARCHD *); +int cpio_endwr(void); +int cpio_id(char *, int); +int cpio_rd(ARCHD *, char *); +off_t cpio_endrd(void); +int cpio_stwr(void); +int cpio_wr(ARCHD *); +int vcpio_id(char *, int); +int crc_id(char *, int); +int crc_strd(void); +int vcpio_rd(ARCHD *, char *); +off_t vcpio_endrd(void); +int crc_stwr(void); +int vcpio_wr(ARCHD *); +int bcpio_id(char *, int); +int bcpio_rd(ARCHD *, char *); +off_t bcpio_endrd(void); +int bcpio_wr(ARCHD *); + +/* + * file_subs.c + */ +extern char *gnu_name_string, *gnu_link_string; +extern size_t gnu_name_length, gnu_link_length; +extern char *xtmp_name; +int file_creat(ARCHD *, int); +void file_close(ARCHD *, int); +int lnk_creat(ARCHD *, int *); +int cross_lnk(ARCHD *); +int chk_same(ARCHD *); +int node_creat(ARCHD *); +int unlnk_exist(char *, int); +int chk_path(char *, uid_t, gid_t); +void set_ftime(char *fnm, time_t mtime, time_t atime, int frc, int slk); +int set_ids(char *, uid_t, gid_t); +void set_pmode(char *, mode_t); +void set_chflags(char *fnm, u_int32_t flags); +int file_write(int, char *, int, int *, int *, int, char *); +void file_flush(int, char *, int); +void rdfile_close(ARCHD *, int *); +int set_crc(ARCHD *, int); + +/* + * ftree.c + */ +int ftree_start(void); +int ftree_add(char *, int); +void ftree_sel(ARCHD *); +void ftree_chk(void); +int next_file(ARCHD *); + +/* + * gen_subs.c + */ +void ls_list(ARCHD *, time_t, FILE *); +void ls_tty(ARCHD *); +void safe_print(const char *, FILE *); +uint32_t asc_u32(char *, int, int); +int u32_asc(uintmax_t, char *, int, int); +uintmax_t asc_umax(char *, int, int); +int umax_asc(uintmax_t, char *, int, int); +int check_Aflag(void); + +/* + * getoldopt.c + */ +struct option; +int getoldopt(int, char **, const char *, struct option *, int *); + +/* + * options.c + */ +extern FSUB fsub[]; +extern int ford[]; +extern int sep; +extern int havechd; +void options(int, char **); +OPLIST * opt_next(void); +int bad_opt(void); +int mkpath(char *); +char *chdname; +#if !HAVE_NBTOOL_CONFIG_H +int do_chroot; +#endif + +/* + * pat_rep.c + */ +int rep_add(char *); +int pat_add(char *, char *, int); +void pat_chk(void); +int pat_sel(ARCHD *); +int pat_match(ARCHD *); +int mod_name(ARCHD *, int); +int set_dest(ARCHD *, char *, int); + +/* + * pax.c + */ +extern int act; +extern FSUB *frmt; +extern int Aflag; +extern int cflag; +extern int cwdfd; +extern int dflag; +extern int iflag; +extern int kflag; +extern int lflag; +extern int nflag; +extern int tflag; +extern int uflag; +extern int vflag; +extern int Dflag; +extern int Hflag; +extern int Lflag; +extern int Mflag; +extern int Vflag; +extern int Xflag; +extern int Yflag; +extern int Zflag; +extern int vfpart; +extern int patime; +extern int pmtime; +extern int nodirs; +extern int pfflags; +extern int pmode; +extern int pids; +extern int rmleadslash; +extern int exit_val; +extern int docrc; +extern int to_stdout; +extern char *dirptr; +extern char *ltmfrmt; +extern const char *argv0; +extern FILE *listf; +extern char *tempfile; +extern char *tempbase; + +/* + * sel_subs.c + */ +int sel_chk(ARCHD *); +int grp_add(char *); +int usr_add(char *); +int trng_add(char *); + +/* + * tables.c + */ +int lnk_start(void); +int chk_lnk(ARCHD *); +void purg_lnk(ARCHD *); +void lnk_end(void); +int ftime_start(void); +int chk_ftime(ARCHD *); +int name_start(void); +int add_name(char *, int, char *); +void sub_name(char *, int *, size_t); +int dev_start(void); +int add_dev(ARCHD *); +int map_dev(ARCHD *, u_long, u_long); +int atdir_start(void); +void atdir_end(void); +void add_atdir(char *, dev_t, ino_t, time_t, time_t); +int get_atdir(dev_t, ino_t, time_t *, time_t *); +int dir_start(void); +void add_dir(char *, int, struct stat *, int); +void proc_dir(void); +u_int st_hash(char *, int, int); + +/* + * tar.c + */ +extern int is_gnutar; +int tar_endwr(void); +off_t tar_endrd(void); +int tar_trail(char *, int, int *); +int tar_id(char *, int); +int tar_opt(void); +int tar_rd(ARCHD *, char *); +int tar_wr(ARCHD *); +int ustar_strd(void); +int ustar_stwr(void); +int ustar_id(char *, int); +int ustar_rd(ARCHD *, char *); +int ustar_wr(ARCHD *); +int tar_gnutar_X_compat(const char *); +int tar_gnutar_minus_minus_exclude(const char *); + +/* + * tty_subs.c + */ +int tty_init(void); +void tty_prnt(const char *, ...) + __attribute__((format (printf, 1, 2))); +int tty_read(char *, int); +void tty_warn(int, const char *, ...) + __attribute__((format (printf, 2, 3))); +void syswarn(int, int, const char *, ...) + __attribute__((format (printf, 3, 4))); diff --git a/bin/pax/file_subs.c b/bin/pax/file_subs.c new file mode 100644 index 0000000..cd421d0 --- /dev/null +++ b/bin/pax/file_subs.c @@ -0,0 +1,1156 @@ +/* $NetBSD: file_subs.c,v 1.63 2013/07/29 17:46:36 christos Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +#if !defined(lint) +#if 0 +static char sccsid[] = "@(#)file_subs.c 8.1 (Berkeley) 5/31/93"; +#else +__RCSID("$NetBSD: file_subs.c,v 1.63 2013/07/29 17:46:36 christos Exp $"); +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "extern.h" +#include "options.h" + +char *xtmp_name; + +static int +mk_link(char *,struct stat *,char *, int); + +static int warn_broken; + +/* + * routines that deal with file operations such as: creating, removing; + * and setting access modes, uid/gid and times of files + */ +#define SET_BITS (S_ISUID | S_ISGID) +#define FILE_BITS (S_IRWXU | S_IRWXG | S_IRWXO) +#define A_BITS (FILE_BITS | SET_BITS | S_ISVTX) + +/* + * The S_ISVTX (sticky bit) can be set by non-superuser on directories + * but not other kinds of files. + */ +#define FILEBITS(dir) ((dir) ? (FILE_BITS | S_ISVTX) : FILE_BITS) +#define SETBITS(dir) ((dir) ? SET_BITS : (SET_BITS | S_ISVTX)) + +static mode_t +apply_umask(mode_t mode) +{ + static mode_t cached_umask; + static int cached_umask_valid; + + if (!cached_umask_valid) { + cached_umask = umask(0); + umask(cached_umask); + cached_umask_valid = 1; + } + + return mode & ~cached_umask; +} + +/* + * file_creat() + * Create and open a file. + * Return: + * file descriptor or -1 for failure + */ + +int +file_creat(ARCHD *arcn, int write_to_hardlink) +{ + int fd = -1; + int oerrno; + + /* + * Some horribly busted tar implementations, have directory nodes + * that end in a /, but they mark as files. Compensate for that + * by not creating a directory node at this point, but a file node, + * and not creating the temp file. + */ + if (arcn->nlen != 0 && arcn->name[arcn->nlen - 1] == '/') { + if (!warn_broken) { + tty_warn(0, "Archive was created with a broken tar;" + " file `%s' is a directory, but marked as plain.", + arcn->name); + warn_broken = 1; + } + return -1; + } + + /* + * In "cpio" archives it's usually the last record of a set of + * hardlinks which includes the contents of the file. We cannot + * use a tempory file in that case because we couldn't link it + * with the existing other hardlinks after restoring the contents + * to it. And it's also useless to create the hardlink under a + * temporary name because the other hardlinks would have partial + * contents while restoring. + */ + if (write_to_hardlink) + return (open(arcn->name, O_TRUNC | O_EXCL | O_RDWR, 0)); + + /* + * Create a temporary file name so that the file doesn't have partial + * contents while restoring. + */ + arcn->tmp_name = malloc(arcn->nlen + 8); + if (arcn->tmp_name == NULL) { + syswarn(1, errno, "Cannot malloc %d bytes", arcn->nlen + 8); + return -1; + } + if (xtmp_name != NULL) + abort(); + xtmp_name = arcn->tmp_name; + + for (;;) { + /* + * try to create the temporary file we use to restore the + * contents info. if this fails, keep checking all the nodes + * in the path until chk_path() finds that it cannot fix + * anything further. if that happens we just give up. + */ + (void)snprintf(arcn->tmp_name, arcn->nlen + 8, "%s.XXXXXX", + arcn->name); + fd = mkstemp(arcn->tmp_name); + if (fd >= 0) + break; + oerrno = errno; + if (nodirs || chk_path(arcn->name,arcn->sb.st_uid,arcn->sb.st_gid) < 0) { + (void)fflush(listf); + syswarn(1, oerrno, "Cannot create %s", arcn->tmp_name); + xtmp_name = NULL; + free(arcn->tmp_name); + arcn->tmp_name = NULL; + return -1; + } + } + return fd; +} + +/* + * file_close() + * Close file descriptor to a file just created by pax. Sets modes, + * ownership and times as required. + * Return: + * 0 for success, -1 for failure + */ + +void +file_close(ARCHD *arcn, int fd) +{ + char *tmp_name; + int res; + + if (fd < 0) + return; + + tmp_name = (arcn->tmp_name != NULL) ? arcn->tmp_name : arcn->name; + + if (close(fd) < 0) + syswarn(0, errno, "Cannot close file descriptor on %s", + tmp_name); + + /* + * set owner/groups first as this may strip off mode bits we want + * then set file permission modes. Then set file access and + * modification times. + */ + if (pids) + res = set_ids(tmp_name, arcn->sb.st_uid, arcn->sb.st_gid); + else + res = 0; + + /* + * IMPORTANT SECURITY NOTE: + * if not preserving mode or we cannot set uid/gid, then PROHIBIT + * set uid/gid bits but restore the file modes (since mkstemp doesn't). + */ + if (!pmode || res) + arcn->sb.st_mode &= ~SETBITS(0); + if (pmode) + set_pmode(tmp_name, arcn->sb.st_mode); + else + set_pmode(tmp_name, + apply_umask((arcn->sb.st_mode & FILEBITS(0)))); + if (patime || pmtime) + set_ftime(tmp_name, arcn->sb.st_mtime, + arcn->sb.st_atime, 0, 0); + + /* Did we write directly to the target file? */ + if (arcn->tmp_name == NULL) + return; + + /* + * Finally, now the temp file is fully instantiated rename it to + * the desired file name. + */ + if (rename(tmp_name, arcn->name) < 0) { + syswarn(0, errno, "Cannot rename %s to %s", + tmp_name, arcn->name); + (void)unlink(tmp_name); + } + +#if HAVE_STRUCT_STAT_ST_FLAGS + if (pfflags && arcn->type != PAX_SLK) + set_chflags(arcn->name, arcn->sb.st_flags); +#endif + + free(arcn->tmp_name); + arcn->tmp_name = NULL; + xtmp_name = NULL; +} + +/* + * lnk_creat() + * Create a hard link to arcn->ln_name from arcn->name. arcn->ln_name + * must exist; + * Return: + * 0 if ok, -1 otherwise + */ + +int +lnk_creat(ARCHD *arcn, int *payload) +{ + struct stat sb; + + /* + * Check if this hardlink carries the "payload". In "cpio" archives + * it's usually the last record of a set of hardlinks which includes + * the contents of the file. + * + */ + *payload = S_ISREG(arcn->sb.st_mode) && + (arcn->sb.st_size > 0) && (arcn->sb.st_size <= arcn->skip); + + /* + * We may be running as root, so we have to be sure that link target + * is not a directory, so we lstat and check. XXX: This is still racy. + */ + if (lstat(arcn->ln_name, &sb) != -1 && S_ISDIR(sb.st_mode)) { + tty_warn(1, "A hard link to the directory %s is not allowed", + arcn->ln_name); + return -1; + } + + return mk_link(arcn->ln_name, &sb, arcn->name, 0); +} + +/* + * cross_lnk() + * Create a hard link to arcn->org_name from arcn->name. Only used in copy + * with the -l flag. No warning or error if this does not succeed (we will + * then just create the file) + * Return: + * 1 if copy() should try to create this file node + * 0 if cross_lnk() ok, -1 for fatal flaw (like linking to self). + */ + +int +cross_lnk(ARCHD *arcn) +{ + /* + * try to make a link to original file (-l flag in copy mode). make + * sure we do not try to link to directories in case we are running as + * root (and it might succeed). + */ + if (arcn->type == PAX_DIR) + return 1; + return mk_link(arcn->org_name, &(arcn->sb), arcn->name, 1); +} + +/* + * chk_same() + * In copy mode if we are not trying to make hard links between the src + * and destinations, make sure we are not going to overwrite ourselves by + * accident. This slows things down a little, but we have to protect all + * those people who make typing errors. + * Return: + * 1 the target does not exist, go ahead and copy + * 0 skip it file exists (-k) or may be the same as source file + */ + +int +chk_same(ARCHD *arcn) +{ + struct stat sb; + + /* + * if file does not exist, return. if file exists and -k, skip it + * quietly + */ + if (lstat(arcn->name, &sb) < 0) + return 1; + if (kflag) + return 0; + + /* + * better make sure the user does not have src == dest by mistake + */ + if ((arcn->sb.st_dev == sb.st_dev) && (arcn->sb.st_ino == sb.st_ino)) { + tty_warn(1, "Unable to copy %s, file would overwrite itself", + arcn->name); + return 0; + } + return 1; +} + +/* + * mk_link() + * try to make a hard link between two files. if ign set, we do not + * complain. + * Return: + * 0 if successful (or we are done with this file but no error, such as + * finding the from file exists and the user has set -k). + * 1 when ign was set to indicates we could not make the link but we + * should try to copy/extract the file as that might work (and is an + * allowed option). -1 an error occurred. + */ + +static int +mk_link(char *to, struct stat *to_sb, char *from, int ign) +{ + struct stat sb; + int oerrno; + + /* + * if from file exists, it has to be unlinked to make the link. If the + * file exists and -k is set, skip it quietly + */ + if (lstat(from, &sb) == 0) { + if (kflag) + return 0; + + /* + * make sure it is not the same file, protect the user + */ + if ((to_sb->st_dev==sb.st_dev)&&(to_sb->st_ino == sb.st_ino)) { + tty_warn(1, "Cannot link file %s to itself", to); + return -1; + } + + /* + * try to get rid of the file, based on the type + */ + if (S_ISDIR(sb.st_mode) && strcmp(from, ".") != 0) { + if (rmdir(from) < 0) { + syswarn(1, errno, "Cannot remove %s", from); + return -1; + } + } else if (unlink(from) < 0) { + if (!ign) { + syswarn(1, errno, "Cannot remove %s", from); + return -1; + } + return 1; + } + } + + /* + * from file is gone (or did not exist), try to make the hard link. + * if it fails, check the path and try it again (if chk_path() says to + * try again) + */ + for (;;) { + if (link(to, from) == 0) + break; + oerrno = errno; + if (chk_path(from, to_sb->st_uid, to_sb->st_gid) == 0) + continue; + if (!ign) { + syswarn(1, oerrno, "Cannot link to %s from %s", to, + from); + return -1; + } + return 1; + } + + /* + * all right the link was made + */ + return 0; +} + +/* + * node_creat() + * create an entry in the file system (other than a file or hard link). + * If successful, sets uid/gid modes and times as required. + * Return: + * 0 if ok, -1 otherwise + */ + +int +node_creat(ARCHD *arcn) +{ + int res; + int ign = 0; + int oerrno; + int pass = 0; + mode_t file_mode; + struct stat sb; + char target[MAXPATHLEN]; + char *nm = arcn->name; + int len; + + /* + * create node based on type, if that fails try to unlink the node and + * try again. finally check the path and try again. As noted in the + * file and link creation routines, this method seems to exhibit the + * best performance in general use workloads. + */ + file_mode = arcn->sb.st_mode & FILEBITS(arcn->type == PAX_DIR); + + for (;;) { + switch (arcn->type) { + case PAX_DIR: + /* + * If -h (or -L) was given in tar-mode, follow the + * potential symlink chain before trying to create the + * directory. + */ + if (strcmp(NM_TAR, argv0) == 0 && Lflag) { + while (lstat(nm, &sb) == 0 && + S_ISLNK(sb.st_mode)) { + len = readlink(nm, target, + sizeof target - 1); + if (len == -1) { + syswarn(0, errno, + "cannot follow symlink %s " + "in chain for %s", + nm, arcn->name); + res = -1; + goto badlink; + } + target[len] = '\0'; + nm = target; + } + } + res = domkdir(nm, file_mode); +badlink: + if (ign) + res = 0; + break; + case PAX_CHR: + file_mode |= S_IFCHR; + res = mknod(nm, file_mode, arcn->sb.st_rdev); + break; + case PAX_BLK: + file_mode |= S_IFBLK; + res = mknod(nm, file_mode, arcn->sb.st_rdev); + break; + case PAX_FIF: + res = mkfifo(nm, file_mode); + break; + case PAX_SCK: + /* + * Skip sockets, operation has no meaning under BSD + */ + tty_warn(0, + "%s skipped. Sockets cannot be copied or extracted", + nm); + return (-1); + case PAX_SLK: + res = symlink(arcn->ln_name, nm); + break; + case PAX_CTG: + case PAX_HLK: + case PAX_HRG: + case PAX_REG: + default: + /* + * we should never get here + */ + tty_warn(0, "%s has an unknown file type, skipping", + nm); + return (-1); + } + + /* + * if we were able to create the node break out of the loop, + * otherwise try to unlink the node and try again. if that + * fails check the full path and try a final time. + */ + if (res == 0) + break; + + /* + * we failed to make the node + */ + oerrno = errno; + switch (pass++) { + case 0: + if ((ign = unlnk_exist(nm, arcn->type)) < 0) + return (-1); + continue; + + case 1: + if (nodirs || + chk_path(nm, arcn->sb.st_uid, + arcn->sb.st_gid) < 0) { + syswarn(1, oerrno, "Cannot create %s", nm); + return (-1); + } + continue; + } + + /* + * it must be a file that exists but we can't create or + * remove, but we must avoid the infinite loop. + */ + break; + } + + /* + * we were able to create the node. set uid/gid, modes and times + */ + if (pids) + res = set_ids(nm, arcn->sb.st_uid, arcn->sb.st_gid); + else + res = 0; + + /* + * IMPORTANT SECURITY NOTE: + * if not preserving mode or we cannot set uid/gid, then PROHIBIT any + * set uid/gid bits + */ + if (!pmode || res) + arcn->sb.st_mode &= ~SETBITS(arcn->type == PAX_DIR); + if (pmode) + set_pmode(arcn->name, arcn->sb.st_mode); + + if (arcn->type == PAX_DIR && strcmp(NM_CPIO, argv0) != 0) { + /* + * Dirs must be processed again at end of extract to set times + * and modes to agree with those stored in the archive. However + * to allow extract to continue, we may have to also set owner + * rights. This allows nodes in the archive that are children + * of this directory to be extracted without failure. Both time + * and modes will be fixed after the entire archive is read and + * before pax exits. + */ + if (access(nm, R_OK | W_OK | X_OK) < 0) { + if (lstat(nm, &sb) < 0) { + syswarn(0, errno,"Cannot access %s (stat)", + arcn->name); + set_pmode(nm,file_mode | S_IRWXU); + } else { + /* + * We have to add rights to the dir, so we make + * sure to restore the mode. The mode must be + * restored AS CREATED and not as stored if + * pmode is not set. + */ + set_pmode(nm, ((sb.st_mode & + FILEBITS(arcn->type == PAX_DIR)) | + S_IRWXU)); + if (!pmode) + arcn->sb.st_mode = sb.st_mode; + } + + /* + * we have to force the mode to what was set here, + * since we changed it from the default as created. + */ + add_dir(nm, arcn->nlen, &(arcn->sb), 1); + } else if (pmode || patime || pmtime) + add_dir(nm, arcn->nlen, &(arcn->sb), 0); + } + + if (patime || pmtime) + set_ftime(arcn->name, arcn->sb.st_mtime, + arcn->sb.st_atime, 0, (arcn->type == PAX_SLK) ? 1 : 0); + +#if HAVE_STRUCT_STAT_ST_FLAGS + if (pfflags && arcn->type != PAX_SLK) + set_chflags(arcn->name, arcn->sb.st_flags); +#endif + return 0; +} + +/* + * unlnk_exist() + * Remove node from file system with the specified name. We pass the type + * of the node that is going to replace it. When we try to create a + * directory and find that it already exists, we allow processing to + * continue as proper modes etc will always be set for it later on. + * Return: + * 0 is ok to proceed, no file with the specified name exists + * -1 we were unable to remove the node, or we should not remove it (-k) + * 1 we found a directory and we were going to create a directory. + */ + +int +unlnk_exist(char *name, int type) +{ + struct stat sb; + + /* + * the file does not exist, or -k we are done + */ + if (lstat(name, &sb) < 0) + return 0; + if (kflag) + return -1; + + if (S_ISDIR(sb.st_mode)) { + /* + * try to remove a directory, if it fails and we were going to + * create a directory anyway, tell the caller (return a 1). + * + * don't try to remove the directory if the name is "." + * otherwise later file/directory creation fails. + */ + if (strcmp(name, ".") == 0) + return 1; + if (rmdir(name) < 0) { + if (type == PAX_DIR) + return 1; + syswarn(1, errno, "Cannot remove directory %s", name); + return -1; + } + return 0; + } + + /* + * try to get rid of all non-directory type nodes + */ + if (unlink(name) < 0) { + (void)fflush(listf); + syswarn(1, errno, "Cannot unlink %s", name); + return -1; + } + return 0; +} + +/* + * chk_path() + * We were trying to create some kind of node in the file system and it + * failed. chk_path() makes sure the path up to the node exists and is + * writable. When we have to create a directory that is missing along the + * path somewhere, the directory we create will be set to the same + * uid/gid as the file has (when uid and gid are being preserved). + * NOTE: this routine is a real performance loss. It is only used as a + * last resort when trying to create entries in the file system. + * Return: + * -1 when it could find nothing it is allowed to fix. + * 0 otherwise + */ + +int +chk_path(char *name, uid_t st_uid, gid_t st_gid) +{ + char *spt = name; + struct stat sb; + int retval = -1; + + /* + * watch out for paths with nodes stored directly in / (e.g. /bozo) + */ + if (*spt == '/') + ++spt; + + for(;;) { + /* + * work forward from the first / and check each part of + * the path + */ + spt = strchr(spt, '/'); + if (spt == NULL) + break; + *spt = '\0'; + + /* + * if it exists we assume it is a directory, it is not within + * the spec (at least it seems to read that way) to alter the + * file system for nodes NOT EXPLICITLY stored on the archive. + * If that assumption is changed, you would test the node here + * and figure out how to get rid of it (probably like some + * recursive unlink()) or fix up the directory permissions if + * required (do an access()). + */ + if (lstat(name, &sb) == 0) { + *(spt++) = '/'; + continue; + } + + /* + * the path fails at this point, see if we can create the + * needed directory and continue on + */ + if (domkdir(name, S_IRWXU | S_IRWXG | S_IRWXO) == -1) { + *spt = '/'; + retval = -1; + break; + } + + /* + * we were able to create the directory. We will tell the + * caller that we found something to fix, and it is ok to try + * and create the node again. + */ + retval = 0; + if (pids) + (void)set_ids(name, st_uid, st_gid); + + /* + * make sure the user doesn't have some strange umask that + * causes this newly created directory to be unusable. We fix + * the modes and restore them back to the creation default at + * the end of pax + */ + if ((access(name, R_OK | W_OK | X_OK) < 0) && + (lstat(name, &sb) == 0)) { + set_pmode(name, ((sb.st_mode & FILEBITS(0)) | + S_IRWXU)); + add_dir(name, spt - name, &sb, 1); + } + *(spt++) = '/'; + continue; + } + /* + * We perform one final check here, because if someone else + * created the directory in parallel with us, we might return + * the wrong error code, even if the directory exists now. + */ + if (retval == -1 && stat(name, &sb) == 0 && S_ISDIR(sb.st_mode)) + retval = 0; + return retval; +} + +/* + * set_ftime() + * Set the access time and modification time for a named file. If frc + * is non-zero we force these times to be set even if the user did not + * request access and/or modification time preservation (this is also + * used by -t to reset access times). + * When ign is zero, only those times the user has asked for are set, the + * other ones are left alone. We do not assume the un-documented feature + * of many utimes() implementations that consider a 0 time value as a do + * not set request. + * + * Unfortunately, there are systems where lutimes() is present but does + * not work on some filesystem types, which cannot be detected at + * compile time. This requires passing down symlink knowledge into + * this function to obtain correct operation. Linux with XFS is one + * example of such a system. + */ + +void +set_ftime(char *fnm, time_t mtime, time_t atime, int frc, int slk) +{ + struct timeval tv[2]; + struct stat sb; + + tv[0].tv_sec = atime; + tv[0].tv_usec = 0; + tv[1].tv_sec = mtime; + tv[1].tv_usec = 0; + if (!frc && (!patime || !pmtime)) { + /* + * if we are not forcing, only set those times the user wants + * set. We get the current values of the times if we need them. + */ + if (lstat(fnm, &sb) == 0) { +#if BSD4_4 && !HAVE_NBTOOL_CONFIG_H + if (!patime) + TIMESPEC_TO_TIMEVAL(&tv[0], &sb.st_atimespec); + if (!pmtime) + TIMESPEC_TO_TIMEVAL(&tv[1], &sb.st_mtimespec); +#else + if (!patime) + tv[0].tv_sec = sb.st_atime; + if (!pmtime) + tv[1].tv_sec = sb.st_mtime; +#endif + } else + syswarn(0, errno, "Cannot obtain file stats %s", fnm); + } + + /* + * set the times + */ +#if HAVE_LUTIMES + if (lutimes(fnm, tv) == 0) + return; + if (errno != ENOSYS) /* XXX linux: lutimes is per-FS */ + goto bad; +#endif + if (slk) + return; + if (utimes(fnm, tv) == -1) + goto bad; + return; +bad: + syswarn(1, errno, "Access/modification time set failed on: %s", fnm); +} + +/* + * set_ids() + * set the uid and gid of a file system node + * Return: + * 0 when set, -1 on failure + */ + +int +set_ids(char *fnm, uid_t uid, gid_t gid) +{ + if (geteuid() == 0) + if (lchown(fnm, uid, gid)) { + (void)fflush(listf); + syswarn(1, errno, "Cannot set file uid/gid of %s", + fnm); + return -1; + } + return 0; +} + +/* + * set_pmode() + * Set file access mode + */ + +void +set_pmode(char *fnm, mode_t mode) +{ + mode &= A_BITS; + if (lchmod(fnm, mode)) { + (void)fflush(listf); + syswarn(1, errno, "Cannot set permissions on %s", fnm); + } + return; +} + +/* + * set_chflags() + * Set 4.4BSD file flags + */ +void +set_chflags(char *fnm, u_int32_t flags) +{ + +#if 0 + if (chflags(fnm, flags) < 0 && errno != EOPNOTSUPP) + syswarn(1, errno, "Cannot set file flags on %s", fnm); +#endif + return; +} + +/* + * file_write() + * Write/copy a file (during copy or archive extract). This routine knows + * how to copy files with lseek holes in it. (Which are read as file + * blocks containing all 0's but do not have any file blocks associated + * with the data). Typical examples of these are files created by dbm + * variants (.pag files). While the file size of these files are huge, the + * actual storage is quite small (the files are sparse). The problem is + * the holes read as all zeros so are probably stored on the archive that + * way (there is no way to determine if the file block is really a hole, + * we only know that a file block of all zero's can be a hole). + * At this writing, no major archive format knows how to archive files + * with holes. However, on extraction (or during copy, -rw) we have to + * deal with these files. Without detecting the holes, the files can + * consume a lot of file space if just written to disk. This replacement + * for write when passed the basic allocation size of a file system block, + * uses lseek whenever it detects the input data is all 0 within that + * file block. In more detail, the strategy is as follows: + * While the input is all zero keep doing an lseek. Keep track of when we + * pass over file block boundaries. Only write when we hit a non zero + * input. once we have written a file block, we continue to write it to + * the end (we stop looking at the input). When we reach the start of the + * next file block, start checking for zero blocks again. Working on file + * block boundaries significantly reduces the overhead when copying files + * that are NOT very sparse. This overhead (when compared to a write) is + * almost below the measurement resolution on many systems. Without it, + * files with holes cannot be safely copied. It does has a side effect as + * it can put holes into files that did not have them before, but that is + * not a problem since the file contents are unchanged (in fact it saves + * file space). (Except on paging files for diskless clients. But since we + * cannot determine one of those file from here, we ignore them). If this + * ever ends up on a system where CTG files are supported and the holes + * are not desired, just do a conditional test in those routines that + * call file_write() and have it call write() instead. BEFORE CLOSING THE + * FILE, make sure to call file_flush() when the last write finishes with + * an empty block. A lot of file systems will not create an lseek hole at + * the end. In this case we drop a single 0 at the end to force the + * trailing 0's in the file. + * ---Parameters--- + * rem: how many bytes left in this file system block + * isempt: have we written to the file block yet (is it empty) + * sz: basic file block allocation size + * cnt: number of bytes on this write + * str: buffer to write + * Return: + * number of bytes written, -1 on write (or lseek) error. + */ + +int +file_write(int fd, char *str, int cnt, int *rem, int *isempt, int sz, + char *name) +{ + char *pt; + char *end; + int wcnt; + char *st = str; + char **strp; + size_t *lenp; + + /* + * while we have data to process + */ + while (cnt) { + if (!*rem) { + /* + * We are now at the start of file system block again + * (or what we think one is...). start looking for + * empty blocks again + */ + *isempt = 1; + *rem = sz; + } + + /* + * only examine up to the end of the current file block or + * remaining characters to write, whatever is smaller + */ + wcnt = MIN(cnt, *rem); + cnt -= wcnt; + *rem -= wcnt; + if (*isempt) { + /* + * have not written to this block yet, so we keep + * looking for zero's + */ + pt = st; + end = st + wcnt; + + /* + * look for a zero filled buffer + */ + while ((pt < end) && (*pt == '\0')) + ++pt; + + if (pt == end) { + /* + * skip, buf is empty so far + */ + if (fd > -1 && + lseek(fd, (off_t)wcnt, SEEK_CUR) < 0) { + syswarn(1, errno, "File seek on %s", + name); + return -1; + } + st = pt; + continue; + } + /* + * drat, the buf is not zero filled + */ + *isempt = 0; + } + + /* + * have non-zero data in this file system block, have to write + */ + switch (fd) { + case -PAX_GLF: + strp = &gnu_name_string; + lenp = &gnu_name_length; + break; + case -PAX_GLL: + strp = &gnu_link_string; + lenp = &gnu_link_length; + break; + default: + strp = NULL; + lenp = NULL; + break; + } + if (strp) { + char *nstr = *strp ? realloc(*strp, *lenp + wcnt + 1) : + malloc(wcnt + 1); + if (nstr == NULL) { + tty_warn(1, "Out of memory"); + return -1; + } + (void)strlcpy(&nstr[*lenp], st, wcnt + 1); + *strp = nstr; + *lenp += wcnt; + } else if (xwrite(fd, st, wcnt) != wcnt) { + syswarn(1, errno, "Failed write to file %s", name); + return -1; + } + st += wcnt; + } + return st - str; +} + +/* + * file_flush() + * when the last file block in a file is zero, many file systems will not + * let us create a hole at the end. To get the last block with zeros, we + * write the last BYTE with a zero (back up one byte and write a zero). + */ + +void +file_flush(int fd, char *fname, int isempt) +{ + static char blnk[] = "\0"; + + /* + * silly test, but make sure we are only called when the last block is + * filled with all zeros. + */ + if (!isempt) + return; + + /* + * move back one byte and write a zero + */ + if (lseek(fd, (off_t)-1, SEEK_CUR) < 0) { + syswarn(1, errno, "Failed seek on file %s", fname); + return; + } + + if (write_with_restart(fd, blnk, 1) < 0) + syswarn(1, errno, "Failed write to file %s", fname); + return; +} + +/* + * rdfile_close() + * close a file we have been reading (to copy or archive). If we have to + * reset access time (tflag) do so (the times are stored in arcn). + */ + +void +rdfile_close(ARCHD *arcn, int *fd) +{ + /* + * make sure the file is open + */ + if (*fd < 0) + return; + + (void)close(*fd); + *fd = -1; + if (!tflag) + return; + + /* + * user wants last access time reset + */ + set_ftime(arcn->org_name, arcn->sb.st_mtime, arcn->sb.st_atime, 1, 0); + return; +} + +/* + * set_crc() + * read a file to calculate its crc. This is a real drag. Archive formats + * that have this, end up reading the file twice (we have to write the + * header WITH the crc before writing the file contents. Oh well... + * Return: + * 0 if was able to calculate the crc, -1 otherwise + */ + +int +set_crc(ARCHD *arcn, int fd) +{ + int i; + int res; + off_t cpcnt = 0L; + u_long size; + unsigned long crc = 0L; + char tbuf[FILEBLK]; + struct stat sb; + + if (fd < 0) { + /* + * hmm, no fd, should never happen. well no crc then. + */ + arcn->crc = 0L; + return 0; + } + + if ((size = (u_long)arcn->sb.st_blksize) > (u_long)sizeof(tbuf)) + size = (u_long)sizeof(tbuf); + + /* + * read all the bytes we think that there are in the file. If the user + * is trying to archive an active file, forget this file. + */ + for(;;) { + if ((res = read(fd, tbuf, size)) <= 0) + break; + cpcnt += res; + for (i = 0; i < res; ++i) + crc += (tbuf[i] & 0xff); + } + + /* + * safety check. we want to avoid archiving files that are active as + * they can create inconsistent archive copies. + */ + if (cpcnt != arcn->sb.st_size) + tty_warn(1, "File changed size %s", arcn->org_name); + else if (fstat(fd, &sb) < 0) + syswarn(1, errno, "Failed stat on %s", arcn->org_name); + else if (arcn->sb.st_mtime != sb.st_mtime) + tty_warn(1, "File %s was modified during read", arcn->org_name); + else if (lseek(fd, (off_t)0L, SEEK_SET) < 0) + syswarn(1, errno, "File rewind failed on: %s", arcn->org_name); + else { + arcn->crc = crc; + return 0; + } + return -1; +} diff --git a/bin/pax/ftree.c b/bin/pax/ftree.c new file mode 100644 index 0000000..0d45429 --- /dev/null +++ b/bin/pax/ftree.c @@ -0,0 +1,741 @@ +/* $NetBSD: ftree.c,v 1.42 2012/09/27 00:44:59 christos Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*- + * Copyright (c) 2001 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Luke Mewburn of Wasabi Systems. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +#if !defined(lint) +#if 0 +static char sccsid[] = "@(#)ftree.c 8.2 (Berkeley) 4/18/94"; +#else +__RCSID("$NetBSD: ftree.c,v 1.42 2012/09/27 00:44:59 christos Exp $"); +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "ftree.h" +#include "extern.h" +#include "options.h" +#ifndef SMALL +#include "mtree.h" +#endif /* SMALL */ + +/* + * routines to interface with the fts library function. + * + * file args supplied to pax are stored on a single linked list (of type FTREE) + * and given to fts to be processed one at a time. pax "selects" files from + * the expansion of each arg into the corresponding file tree (if the arg is a + * directory, otherwise the node itself is just passed to pax). The selection + * is modified by the -n and -u flags. The user is informed when a specific + * file arg does not generate any selected files. -n keeps expanding the file + * tree arg until one of its files is selected, then skips to the next file + * arg. when the user does not supply the file trees as command line args to + * pax, they are read from stdin + */ + +static FTS *ftsp = NULL; /* current FTS handle */ +static int ftsopts; /* options to be used on fts_open */ +static char *farray[2]; /* array for passing each arg to fts */ +static FTREE *fthead = NULL; /* head of linked list of file args */ +static FTREE *fttail = NULL; /* tail of linked list of file args */ +static FTREE *ftcur = NULL; /* current file arg being processed */ +static FTSENT *ftent = NULL; /* current file tree entry */ +static int ftree_skip; /* when set skip to next file arg */ +#ifndef SMALL +static NODE *ftnode = NULL; /* mtree(8) specfile; used by -M */ +#endif /* SMALL */ + +static int ftree_arg(void); + +#define FTS_ERRNO(x) (x)->fts_errno + +/* + * ftree_start() + * initialize the options passed to fts_open() during this run of pax + * options are based on the selection of pax options by the user + * fts_start() also calls fts_arg() to open the first valid file arg. We + * also attempt to reset directory access times when -t (tflag) is set. + * Return: + * 0 if there is at least one valid file arg to process, -1 otherwise + */ + +int +ftree_start(void) +{ + +#ifndef SMALL + /* + * if -M is given, the list of filenames on stdin is actually + * an mtree(8) specfile, so parse the specfile into a NODE * + * tree at ftnode, for use by next_file() + */ + if (Mflag) { + if (fthead != NULL) { + tty_warn(1, + "The -M flag is only supported when reading file list from stdin"); + return -1; + } + ftnode = spec(stdin); + if (ftnode != NULL && + (ftnode->type != F_DIR || strcmp(ftnode->name, ".") != 0)) { + tty_warn(1, + "First node of specfile is not `.' directory"); + return -1; + } + return 0; + } +#endif /* SMALL */ + + /* + * set up the operation mode of fts, open the first file arg. We must + * use FTS_NOCHDIR, as the user may have to open multiple archives and + * if fts did a chdir off into the boondocks, we may create an archive + * volume in an place where the user did not expect to. + */ + ftsopts = FTS_NOCHDIR; + + /* + * optional user flags that effect file traversal + * -H command line symlink follow only (half follow) + * -L follow sylinks (logical) + * -P do not follow sylinks (physical). This is the default. + * -X do not cross over mount points + * -t preserve access times on files read. + * -n select only the first member of a file tree when a match is found + * -d do not extract subtrees rooted at a directory arg. + */ + if (Lflag) + ftsopts |= FTS_LOGICAL; + else + ftsopts |= FTS_PHYSICAL; + if (Hflag) + ftsopts |= FTS_COMFOLLOW; + if (Xflag) + ftsopts |= FTS_XDEV; + + if ((fthead == NULL) && ((farray[0] = malloc(PAXPATHLEN+2)) == NULL)) { + tty_warn(1, "Unable to allocate memory for file name buffer"); + return -1; + } + + if (ftree_arg() < 0) + return -1; + if (tflag && (atdir_start() < 0)) + return -1; + return 0; +} + +/* + * ftree_add() + * add the arg to the linked list of files to process. Each will be + * processed by fts one at a time + * Return: + * 0 if added to the linked list, -1 if failed + */ + +int +ftree_add(char *str, int isdir) +{ + FTREE *ft; + int len; + + /* + * simple check for bad args + */ + if ((str == NULL) || (*str == '\0')) { + tty_warn(0, "Invalid file name argument"); + return -1; + } + + /* + * allocate FTREE node and add to the end of the linked list (args are + * processed in the same order they were passed to pax). Get rid of any + * trailing / the user may pass us. (watch out for / by itself). + */ + if ((ft = (FTREE *)malloc(sizeof(FTREE))) == NULL) { + tty_warn(0, "Unable to allocate memory for filename"); + return -1; + } + + if (((len = strlen(str) - 1) > 0) && (str[len] == '/')) + str[len] = '\0'; + ft->fname = str; + ft->refcnt = -isdir; + ft->fow = NULL; + if (fthead == NULL) { + fttail = fthead = ft; + return 0; + } + fttail->fow = ft; + fttail = ft; + return 0; +} + +/* + * ftree_sel() + * this entry has been selected by pax. bump up reference count and handle + * -n and -d processing. + */ + +void +ftree_sel(ARCHD *arcn) +{ + /* + * set reference bit for this pattern. This linked list is only used + * when file trees are supplied pax as args. The list is not used when + * the trees are read from stdin. + */ + if (ftcur != NULL) + ftcur->refcnt = 1; + + /* + * if -n we are done with this arg, force a skip to the next arg when + * pax asks for the next file in next_file(). + * if -M we don't use fts(3), so the rest of this function is moot. + * if -d we tell fts only to match the directory (if the arg is a dir) + * and not the entire file tree rooted at that point. + */ + if (nflag) + ftree_skip = 1; + + if (Mflag || !dflag || (arcn->type != PAX_DIR)) + return; + + if (ftent != NULL) + (void)fts_set(ftsp, ftent, FTS_SKIP); +} + +/* + * ftree_chk() + * called at end on pax execution. Prints all those file args that did not + * have a selected member (reference count still 0) + */ + +void +ftree_chk(void) +{ + FTREE *ft; + int wban = 0; + + /* + * make sure all dir access times were reset. + */ + if (tflag) + atdir_end(); + + /* + * walk down list and check reference count. Print out those members + * that never had a match + */ + for (ft = fthead; ft != NULL; ft = ft->fow) { + if (ft->refcnt != 0) + continue; + if (wban == 0) { + tty_warn(1, + "WARNING! These file names were not selected:"); + ++wban; + } + (void)fprintf(stderr, "%s\n", ft->fname); + } +} + +/* + * ftree_arg() + * Get the next file arg for fts to process. Can be from either the linked + * list or read from stdin when the user did not them as args to pax. Each + * arg is processed until the first successful fts_open(). + * Return: + * 0 when the next arg is ready to go, -1 if out of file args (or EOF on + * stdin). + */ + +static int +ftree_arg(void) +{ + /* + * close off the current file tree + */ + if (ftsp != NULL) { + (void)fts_close(ftsp); + ftsp = NULL; + ftent = NULL; + } + + /* + * keep looping until we get a valid file tree to process. Stop when we + * reach the end of the list (or get an eof on stdin) + */ + for(;;) { + if (fthead == NULL) { + int i, c = EOF; + /* + * the user didn't supply any args, get the file trees + * to process from stdin; + */ + for (i = 0; i < PAXPATHLEN + 2;) { + c = getchar(); + if (c == EOF) + break; + else if (c == sep) { + if (i != 0) + break; + } else + farray[0][i++] = c; + } + if (i == 0) + return -1; + farray[0][i] = '\0'; + } else { + /* + * the user supplied the file args as arguments to pax + */ + if (ftcur == NULL) + ftcur = fthead; + else if ((ftcur = ftcur->fow) == NULL) + return -1; + + if (ftcur->refcnt < 0) { + /* + * chdir entry. + * Change directory and retry loop. + */ + if (ar_dochdir(ftcur->fname)) + return (-1); + continue; + } + farray[0] = ftcur->fname; + } + + /* + * watch it, fts wants the file arg stored in a array of char + * ptrs, with the last one a null. we use a two element array + * and set farray[0] to point at the buffer with the file name + * in it. We cannot pass all the file args to fts at one shot + * as we need to keep a handle on which file arg generates what + * files (the -n and -d flags need this). If the open is + * successful, return a 0. + */ + if ((ftsp = fts_open(farray, ftsopts, NULL)) != NULL) + break; + } + return 0; +} + +/* + * next_file() + * supplies the next file to process in the supplied archd structure. + * Return: + * 0 when contents of arcn have been set with the next file, -1 when done. + */ + +int +next_file(ARCHD *arcn) +{ +#ifndef SMALL + static char curdir[PAXPATHLEN+2], curpath[PAXPATHLEN+2]; + static int curdirlen; + + struct stat statbuf; + FTSENT Mftent; +#endif /* SMALL */ + int cnt; + time_t atime, mtime; + char *curlink; +#define MFTENT_DUMMY_DEV UINT_MAX + + curlink = NULL; +#ifndef SMALL + /* + * if parsing an mtree(8) specfile, build up `dummy' ftsent + * from specfile info, and jump below to complete setup of arcn. + */ + if (Mflag) { + int skipoptional; + + next_ftnode: + skipoptional = 0; + if (ftnode == NULL) /* tree is empty */ + return (-1); + + /* get current name */ + if (snprintf(curpath, sizeof(curpath), "%s%s%s", + curdir, curdirlen ? "/" : "", ftnode->name) + >= (int)sizeof(curpath)) { + tty_warn(1, "line %lu: %s: %s", (u_long)ftnode->lineno, + curdir, strerror(ENAMETOOLONG)); + return (-1); + } + ftnode->flags |= F_VISIT; /* mark node visited */ + + /* construct dummy FTSENT */ + Mftent.fts_path = curpath; + Mftent.fts_statp = &statbuf; + Mftent.fts_pointer = ftnode; + ftent = &Mftent; + /* look for existing file */ + if (lstat(Mftent.fts_path, &statbuf) == -1) { + if (ftnode->flags & F_OPT) + skipoptional = 1; + + /* missing: fake up stat info */ + memset(&statbuf, 0, sizeof(statbuf)); + statbuf.st_dev = MFTENT_DUMMY_DEV; + statbuf.st_ino = ftnode->lineno; + statbuf.st_size = 0; +#define NODETEST(t, m) \ + if (!(t)) { \ + tty_warn(1, "line %lu: %s: %s not specified", \ + (u_long)ftnode->lineno, \ + ftent->fts_path, m); \ + return -1; \ + } + statbuf.st_mode = nodetoino(ftnode->type); + NODETEST(ftnode->flags & F_TYPE, "type"); + NODETEST(ftnode->flags & F_MODE, "mode"); + if (!(ftnode->flags & F_TIME)) + statbuf.st_mtime = starttime; + NODETEST(ftnode->flags & (F_GID | F_GNAME), "group"); + NODETEST(ftnode->flags & (F_UID | F_UNAME), "user"); + if (ftnode->type == F_BLOCK || ftnode->type == F_CHAR) + NODETEST(ftnode->flags & F_DEV, + "device number"); + if (ftnode->type == F_LINK) + NODETEST(ftnode->flags & F_SLINK, "symlink"); + /* don't require F_FLAGS or F_SIZE */ +#undef NODETEST + } else { + if (ftnode->flags & F_TYPE && nodetoino(ftnode->type) + != (statbuf.st_mode & S_IFMT)) { + tty_warn(1, + "line %lu: %s: type mismatch: specfile %s, tree %s", + (u_long)ftnode->lineno, ftent->fts_path, + inotype(nodetoino(ftnode->type)), + inotype(statbuf.st_mode)); + return -1; + } + if (ftnode->type == F_DIR && (ftnode->flags & F_OPT)) + skipoptional = 1; + } + /* + * override settings with those from specfile + */ + if (ftnode->flags & F_MODE) { + statbuf.st_mode &= ~ALLPERMS; + statbuf.st_mode |= (ftnode->st_mode & ALLPERMS); + } + if (ftnode->flags & (F_GID | F_GNAME)) + statbuf.st_gid = ftnode->st_gid; + if (ftnode->flags & (F_UID | F_UNAME)) + statbuf.st_uid = ftnode->st_uid; +#if HAVE_STRUCT_STAT_ST_FLAGS + if (ftnode->flags & F_FLAGS) + statbuf.st_flags = ftnode->st_flags; +#endif + if (ftnode->flags & F_TIME) +#if BSD4_4 && !HAVE_NBTOOL_CONFIG_H + statbuf.st_mtimespec = ftnode->st_mtimespec; +#else + statbuf.st_mtime = ftnode->st_mtimespec.tv_sec; +#endif + if (ftnode->flags & F_DEV) + statbuf.st_rdev = ftnode->st_rdev; + if (ftnode->flags & F_SLINK) + curlink = ftnode->slink; + /* ignore F_SIZE */ + + /* + * find next node + */ + if (ftnode->type == F_DIR && ftnode->child != NULL) { + /* directory with unseen child */ + ftnode = ftnode->child; + curdirlen = strlcpy(curdir, curpath, sizeof(curdir)); + } else do { + if (ftnode->next != NULL) { + /* next node at current level */ + ftnode = ftnode->next; + } else { /* move back to parent */ + /* reset time only on first cd.. */ + if (Mftent.fts_pointer == ftnode && tflag && + (get_atdir(MFTENT_DUMMY_DEV, ftnode->lineno, + &mtime, &atime) == 0)) { + set_ftime(ftent->fts_path, + mtime, atime, 1, 0); + } + ftnode = ftnode->parent; + if (ftnode->parent == ftnode) + ftnode = NULL; + else { + curdirlen -= strlen(ftnode->name) + 1; + curdir[curdirlen] = '\0'; + } + } + } while (ftnode != NULL && ftnode->flags & F_VISIT); + if (skipoptional) /* skip optional entries */ + goto next_ftnode; + goto got_ftent; + } +#endif /* SMALL */ + + /* + * ftree_sel() might have set the ftree_skip flag if the user has the + * -n option and a file was selected from this file arg tree. (-n says + * only one member is matched for each pattern) ftree_skip being 1 + * forces us to go to the next arg now. + */ + if (ftree_skip) { + /* + * clear and go to next arg + */ + ftree_skip = 0; + if (ftree_arg() < 0) + return -1; + } + + if (ftsp == NULL) + return -1; + /* + * loop until we get a valid file to process + */ + for(;;) { + if ((ftent = fts_read(ftsp)) == NULL) { + /* + * out of files in this tree, go to next arg, if none + * we are done + */ + if (ftree_arg() < 0) + return -1; + continue; + } + + /* + * handle each type of fts_read() flag + */ + switch(ftent->fts_info) { + case FTS_D: + case FTS_DEFAULT: + case FTS_F: + case FTS_SL: + case FTS_SLNONE: + /* + * these are all ok + */ + break; + case FTS_DP: + /* + * already saw this directory. If the user wants file + * access times reset, we use this to restore the + * access time for this directory since this is the + * last time we will see it in this file subtree + * remember to force the time (this is -t on a read + * directory, not a created directory). + */ + if (!tflag || (get_atdir( + ftent->fts_statp->st_dev, ftent->fts_statp->st_ino, + &mtime, &atime) < 0)) + continue; + set_ftime(ftent->fts_path, mtime, atime, 1, 0); + continue; + case FTS_DC: + /* + * fts claims a file system cycle + */ + tty_warn(1,"File system cycle found at %s", + ftent->fts_path); + continue; + case FTS_DNR: + syswarn(1, FTS_ERRNO(ftent), + "Unable to read directory %s", ftent->fts_path); + continue; + case FTS_ERR: + syswarn(1, FTS_ERRNO(ftent), + "File system traversal error"); + continue; + case FTS_NS: + case FTS_NSOK: + syswarn(1, FTS_ERRNO(ftent), + "Unable to access %s", ftent->fts_path); + continue; + } + +#ifndef SMALL + got_ftent: +#endif /* SMALL */ + /* + * ok got a file tree node to process. copy info into arcn + * structure (initialize as required) + */ + arcn->skip = 0; + arcn->pad = 0; + arcn->ln_nlen = 0; + arcn->ln_name[0] = '\0'; + arcn->sb = *(ftent->fts_statp); + + /* + * file type based set up and copy into the arcn struct + * SIDE NOTE: + * we try to reset the access time on all files and directories + * we may read when the -t flag is specified. files are reset + * when we close them after copying. we reset the directories + * when we are done with their file tree (we also clean up at + * end in case we cut short a file tree traversal). However + * there is no way to reset access times on symlinks. + */ + switch(S_IFMT & arcn->sb.st_mode) { + case S_IFDIR: + arcn->type = PAX_DIR; + if (!tflag) + break; + add_atdir(ftent->fts_path, arcn->sb.st_dev, + arcn->sb.st_ino, arcn->sb.st_mtime, + arcn->sb.st_atime); + break; + case S_IFCHR: + arcn->type = PAX_CHR; + break; + case S_IFBLK: + arcn->type = PAX_BLK; + break; + case S_IFREG: + /* + * only regular files with have data to store on the + * archive. all others will store a zero length skip. + * the skip field is used by pax for actual data it has + * to read (or skip over). + */ + arcn->type = PAX_REG; + arcn->skip = arcn->sb.st_size; + break; + case S_IFLNK: + arcn->type = PAX_SLK; + if (curlink != NULL) { + cnt = strlcpy(arcn->ln_name, curlink, + sizeof(arcn->ln_name)); + /* + * have to read the symlink path from the file + */ + } else if ((cnt = + readlink(ftent->fts_path, arcn->ln_name, + sizeof(arcn->ln_name) - 1)) < 0) { + syswarn(1, errno, "Unable to read symlink %s", + ftent->fts_path); + continue; + } + /* + * set link name length, watch out readlink does not + * always null terminate the link path + */ + arcn->ln_name[cnt] = '\0'; + arcn->ln_nlen = cnt; + break; +#ifdef S_IFSOCK + case S_IFSOCK: + /* + * under BSD storing a socket is senseless but we will + * let the format specific write function make the + * decision of what to do with it. + */ + arcn->type = PAX_SCK; + break; +#endif + case S_IFIFO: + arcn->type = PAX_FIF; + break; + } + break; + } + + /* + * copy file name, set file name length + */ + arcn->nlen = strlcpy(arcn->name, ftent->fts_path, sizeof(arcn->name)); + arcn->org_name = arcn->fts_name; + strlcpy(arcn->fts_name, ftent->fts_path, sizeof arcn->fts_name); + if (strcmp(NM_CPIO, argv0) == 0) { + /* + * cpio does *not* descend directories listed in the + * arguments, unlike pax/tar, so needs special handling + * here. failure to do so results in massive amounts + * of duplicated files in the output. We kill fts after + * the first name is extracted, what a waste. + */ + ftcur->refcnt = 1; + (void)ftree_arg(); + } + return 0; +} diff --git a/bin/pax/ftree.h b/bin/pax/ftree.h new file mode 100644 index 0000000..490cf51 --- /dev/null +++ b/bin/pax/ftree.h @@ -0,0 +1,48 @@ +/* $NetBSD: ftree.h,v 1.5 2003/10/13 07:41:22 agc Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ftree.h 8.1 (Berkeley) 5/31/93 + */ + +/* + * Data structure used by the ftree.c routines to store the file args to be + * handed to fts(). It keeps a reference count of which args generated a + * "selected" member + */ + +typedef struct ftree { + char *fname; /* file tree name */ + int refcnt; /* has tree had a selected file? */ + struct ftree *fow; /* pointer to next entry on list */ +} FTREE; diff --git a/bin/pax/gen_subs.c b/bin/pax/gen_subs.c new file mode 100644 index 0000000..9228c69 --- /dev/null +++ b/bin/pax/gen_subs.c @@ -0,0 +1,437 @@ +/* $NetBSD: gen_subs.c,v 1.37 2018/11/30 00:53:11 christos Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +#if !defined(lint) +#if 0 +static char sccsid[] = "@(#)gen_subs.c 8.1 (Berkeley) 5/31/93"; +#else +__RCSID("$NetBSD: gen_subs.c,v 1.37 2018/11/30 00:53:11 christos Exp $"); +#endif +#endif /* not lint */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pax.h" +#include "extern.h" + +/* + * a collection of general purpose subroutines used by pax + */ + +/* + * constants used by ls_list() when printing out archive members + */ +#define MODELEN 20 +#define DATELEN 64 +#define SIXMONTHS ((DAYSPERNYEAR / 2) * SECSPERDAY) +#define CURFRMT "%b %e %H:%M" +#define OLDFRMT "%b %e %Y" +#ifndef UT_NAMESIZE +#define UT_NAMESIZE 8 +#endif +#define UT_GRPSIZE 6 + +/* + * convert time to string + */ +static void +formattime(char *buf, size_t buflen, time_t when) +{ + int error; + struct tm tm; + (void)localtime_r(&when, &tm); + + if (when + SIXMONTHS <= time(NULL)) + error = strftime(buf, buflen, OLDFRMT, &tm); + else + error = strftime(buf, buflen, CURFRMT, &tm); + + if (error == 0) + buf[0] = '\0'; +} + +/* + * ls_list() + * list the members of an archive in ls format + */ + +void +ls_list(ARCHD *arcn, time_t now, FILE *fp) +{ + struct stat *sbp; + char f_mode[MODELEN]; + char f_date[DATELEN]; + const char *user, *group; + + /* + * if not verbose, just print the file name + */ + if (!vflag) { + (void)fprintf(fp, "%s\n", arcn->name); + (void)fflush(fp); + return; + } + + /* + * user wants long mode + */ + sbp = &(arcn->sb); + strmode(sbp->st_mode, f_mode); + + /* + * time format based on age compared to the time pax was started. + */ + formattime(f_date, sizeof(f_date), arcn->sb.st_mtime); + /* + * print file mode, link count, uid, gid and time + */ + user = user_from_uid(sbp->st_uid, 0); + group = group_from_gid(sbp->st_gid, 0); + (void)fprintf(fp, "%s%2lu %-*s %-*s ", f_mode, + (unsigned long)sbp->st_nlink, + UT_NAMESIZE, user ? user : "", UT_GRPSIZE, group ? group : ""); + + /* + * print device id's for devices, or sizes for other nodes + */ + if ((arcn->type == PAX_CHR) || (arcn->type == PAX_BLK)) + (void)fprintf(fp, "%4lu,%4lu ", (long) MAJOR(sbp->st_rdev), + (long) MINOR(sbp->st_rdev)); + else { + (void)fprintf(fp, OFFT_FP("9") " ", (OFFT_T)sbp->st_size); + } + + /* + * print name and link info for hard and soft links + */ + (void)fprintf(fp, "%s %s", f_date, arcn->name); + if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) + (void)fprintf(fp, " == %s\n", arcn->ln_name); + else if (arcn->type == PAX_SLK) + (void)fprintf(fp, " -> %s\n", arcn->ln_name); + else + (void)fputc('\n', fp); + (void)fflush(fp); +} + +/* + * tty_ls() + * print a short summary of file to tty. + */ + +void +ls_tty(ARCHD *arcn) +{ + char f_date[DATELEN]; + char f_mode[MODELEN]; + + formattime(f_date, sizeof(f_date), arcn->sb.st_mtime); + strmode(arcn->sb.st_mode, f_mode); + tty_prnt("%s%s %s\n", f_mode, f_date, arcn->name); + return; +} + +void +safe_print(const char *str, FILE *fp) +{ + char visbuf[5]; + const char *cp; + + /* + * if printing to a tty, use vis(3) to print special characters. + */ + if (isatty(fileno(fp))) { + for (cp = str; *cp; cp++) { + (void)vis(visbuf, cp[0], VIS_CSTYLE, cp[1]); + (void)fputs(visbuf, fp); + } + } else { + (void)fputs(str, fp); + } +} + +/* + * asc_u32() + * convert hex/octal character string into a uint32_t. We do not have to + * check for overflow! (the headers in all supported formats are not large + * enough to create an overflow). + * NOTE: strings passed to us are NOT TERMINATED. + * Return: + * uint32_t value + */ + +uint32_t +asc_u32(char *str, int len, int base) +{ + char *stop; + uint32_t tval = 0; + + stop = str + len; + + /* + * skip over leading blanks and zeros + */ + while ((str < stop) && ((*str == ' ') || (*str == '0'))) + ++str; + + /* + * for each valid digit, shift running value (tval) over to next digit + * and add next digit + */ + if (base == HEX) { + while (str < stop) { + if ((*str >= '0') && (*str <= '9')) + tval = (tval << 4) + (*str++ - '0'); + else if ((*str >= 'A') && (*str <= 'F')) + tval = (tval << 4) + 10 + (*str++ - 'A'); + else if ((*str >= 'a') && (*str <= 'f')) + tval = (tval << 4) + 10 + (*str++ - 'a'); + else + break; + } + } else { + while ((str < stop) && (*str >= '0') && (*str <= '7')) + tval = (tval << 3) + (*str++ - '0'); + } + return tval; +} + +/* + * u32_asc() + * convert an uintmax_t into an hex/oct ascii string. pads with LEADING + * ascii 0's to fill string completely + * NOTE: the string created is NOT TERMINATED. + */ + +int +u32_asc(uintmax_t val, char *str, int len, int base) +{ + char *pt; + uint32_t digit; + uintmax_t p; + + p = val & TOP_HALF; + if (p && p != TOP_HALF) + return -1; + + val &= BOTTOM_HALF; + + /* + * WARNING str is not '\0' terminated by this routine + */ + pt = str + len - 1; + + /* + * do a tailwise conversion (start at right most end of string to place + * least significant digit). Keep shifting until conversion value goes + * to zero (all digits were converted) + */ + if (base == HEX) { + while (pt >= str) { + if ((digit = (val & 0xf)) < 10) + *pt-- = '0' + (char)digit; + else + *pt-- = 'a' + (char)(digit - 10); + if ((val = (val >> 4)) == (u_long)0) + break; + } + } else { + while (pt >= str) { + *pt-- = '0' + (char)(val & 0x7); + if ((val = (val >> 3)) == 0) + break; + } + } + + /* + * pad with leading ascii ZEROS. We return -1 if we ran out of space. + */ + while (pt >= str) + *pt-- = '0'; + if (val != 0) + return -1; + return 0; +} + +/* + * asc_umax() + * convert hex/octal/base-256 value into a uintmax. + * NOTE: strings passed to us are NOT TERMINATED. + * Return: + * uintmax_t value; UINTMAX_MAX for overflow/negative + */ + +uintmax_t +asc_umax(char *str, int len, int base) +{ + char *stop; + uintmax_t tval = 0; + + stop = str + len; + + /* + * if the highest bit of first byte is set, it's base-256 encoded + * (base-256 is basically (n-1)-bit big endian signed + */ + if (str < stop && (*str & 0x80)) { + /* + * uintmax_t can't be negative, so fail on negative numbers + */ + if (*str & 0x40) + return UINTMAX_MAX; + + tval = *str++ & 0x3f; + while (str < stop) { + /* + * check for overflow + */ + if (tval > (UINTMAX_MAX/256)) + return UINTMAX_MAX; + tval = (tval << 8) | ((*str++) & 0xFF); + } + + return tval; + } + + /* + * skip over leading blanks and zeros + */ + while ((str < stop) && ((*str == ' ') || (*str == '0'))) + ++str; + + /* + * for each valid digit, shift running value (tval) over to next digit + * and add next digit + */ + if (base == HEX) { + while (str < stop) { + if ((*str >= '0') && (*str <= '9')) + tval = (tval << 4) + (*str++ - '0'); + else if ((*str >= 'A') && (*str <= 'F')) + tval = (tval << 4) + 10 + (*str++ - 'A'); + else if ((*str >= 'a') && (*str <= 'f')) + tval = (tval << 4) + 10 + (*str++ - 'a'); + else + break; + } + } else { + while ((str < stop) && (*str >= '0') && (*str <= '7')) + tval = (tval << 3) + (*str++ - '0'); + } + return tval; +} + +/* + * umax_asc() + * convert an uintmax_t into a hex/oct ascii string. pads with + * LEADING ascii 0's to fill string completely + * NOTE: the string created is NOT TERMINATED. + */ + +int +umax_asc(uintmax_t val, char *str, int len, int base) +{ + char *pt; + uintmax_t digit; + + /* + * WARNING str is not '\0' terminated by this routine + */ + pt = str + len - 1; + + /* + * do a tailwise conversion (start at right most end of string to place + * least significant digit). Keep shifting until conversion value goes + * to zero (all digits were converted) + */ + if (base == HEX) { + while (pt >= str) { + if ((digit = (val & 0xf)) < 10) + *pt-- = '0' + (char)digit; + else + *pt-- = 'a' + (char)(digit - 10); + if ((val = (val >> 4)) == 0) + break; + } + } else { + while (pt >= str) { + *pt-- = '0' + (char)(val & 0x7); + if ((val = (val >> 3)) == 0) + break; + } + } + + /* + * pad with leading ascii ZEROS. We return -1 if we ran out of space. + */ + while (pt >= str) + *pt-- = '0'; + if (val != 0) + return -1; + return 0; +} + +int +check_Aflag(void) +{ + + if (Aflag > 0) + return 1; + if (Aflag == 0) { + Aflag = -1; + tty_warn(0, + "Removing leading / from absolute path names in the archive"); + } + return 0; +} diff --git a/bin/pax/getoldopt.c b/bin/pax/getoldopt.c new file mode 100644 index 0000000..2d02e7e --- /dev/null +++ b/bin/pax/getoldopt.c @@ -0,0 +1,92 @@ +/* $NetBSD: getoldopt.c,v 1.23 2012/08/09 11:05:59 christos Exp $ */ + +/* + * Plug-compatible replacement for getopt() for parsing tar-like + * arguments. If the first argument begins with "-", it uses getopt; + * otherwise, it uses the old rules used by tar, dump, and ps. + * + * Written 25 August 1985 by John Gilmore (ihnp4!hoptoad!gnu) and placed + * in the Public Domain for your edification and enjoyment. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +#if !defined(lint) +__RCSID("$NetBSD: getoldopt.c,v 1.23 2012/08/09 11:05:59 christos Exp $"); +#endif /* not lint */ + +#if HAVE_NBTOOL_CONFIG_H +#include "compat_getopt.h" +#else +#include +#endif +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "extern.h" + +int +getoldopt(int argc, char **argv, const char *optstring, + struct option *longopts, int *idx) +{ + static char *key; /* Points to next keyletter */ + static char use_getopt; /* !=0 if argv[1][0] was '-' */ + char c; + char *place; + + optarg = NULL; + + if (key == NULL) { /* First time */ + if (argc < 2) return -1; + key = argv[1]; + if (*key == '-') + use_getopt++; + else + optind = 2; + } + + c = '\0'; + if (!use_getopt) { + c = *key++; + if (c == '\0') { + key--; + use_getopt = 1; + } + } + if (use_getopt) { + if (longopts != NULL) { + return getopt_long(argc, argv, optstring, + longopts, idx); + } else { + return getopt(argc, argv, optstring); + } + } + + place = strchr(optstring, c); + + if (place == NULL || c == ':') { + fprintf(stderr, "%s: unknown option %c\n", argv[0], c); + return '?'; + } + + place++; + if (*place == ':') { + if (optind < argc) { + optarg = argv[optind]; + optind++; + } else { + fprintf(stderr, "%s: %c argument missing\n", + argv[0], c); + return '?'; + } + } + + return c; +} diff --git a/bin/pax/options.c b/bin/pax/options.c new file mode 100644 index 0000000..74e1480 --- /dev/null +++ b/bin/pax/options.c @@ -0,0 +1,2229 @@ +/* $NetBSD: options.c,v 1.118 2015/12/19 18:45:52 christos Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +#if !defined(lint) +#if 0 +static char sccsid[] = "@(#)options.c 8.2 (Berkeley) 4/18/94"; +#else +__RCSID("$NetBSD: options.c,v 1.118 2015/12/19 18:45:52 christos Exp $"); +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#if HAVE_NBTOOL_CONFIG_H +#include "compat_getopt.h" +#else +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "options.h" +#include "cpio.h" +#include "tar.h" +#include "extern.h" +#ifndef SMALL +#include "mtree.h" +#endif /* SMALL */ + +/* + * Routines which handle command line options + */ +struct stat tst; /* Timestamp to set if non-0 */ + +static int nopids; /* tar mode: suppress "pids" for -p option */ +static char flgch[] = FLGCH; /* list of all possible flags (pax) */ +static OPLIST *ophead = NULL; /* head for format specific options -x */ +static OPLIST *optail = NULL; /* option tail */ + +static int opt_add(const char *); +static int no_op(void); +static void printflg(unsigned int); +static int c_frmt(const void *, const void *); +static off_t str_offt(char *); +static char *get_line(FILE *fp); +#ifndef SMALL +static int set_tstamp(const char *, struct stat *); +#endif +static void pax_options(int, char **); +__dead static void pax_usage(void); +static void tar_options(int, char **); +__dead static void tar_usage(void); +#ifndef NO_CPIO +static void cpio_options(int, char **); +__dead static void cpio_usage(void); +#endif + +/* errors from get_line */ +#define GETLINE_FILE_CORRUPT 1 +#define GETLINE_OUT_OF_MEM 2 +static int get_line_error; + +#define BZIP2_CMD "bzip2" /* command to run as bzip2 */ +#define GZIP_CMD "gzip" /* command to run as gzip */ +#define XZ_CMD "xz" /* command to run as xz */ +#define COMPRESS_CMD "compress" /* command to run as compress */ + +/* + * Long options. + */ +#define OPT_USE_COMPRESS_PROGRAM 0 +#define OPT_CHECKPOINT 1 +#define OPT_UNLINK 2 +#define OPT_HELP 3 +#define OPT_ATIME_PRESERVE 4 +#define OPT_IGNORE_FAILED_READ 5 +#define OPT_REMOVE_FILES 6 +#define OPT_NULL 7 +#define OPT_TOTALS 8 +#define OPT_VERSION 9 +#define OPT_EXCLUDE 10 +#define OPT_BLOCK_COMPRESS 11 +#define OPT_NORECURSE 12 +#define OPT_FORCE_LOCAL 13 +#define OPT_INSECURE 14 +#define OPT_STRICT 15 +#define OPT_SPARSE 16 +#define OPT_XZ 17 +#define OPT_GNU 18 +#define OPT_TIMESTAMP 19 +#if !HAVE_NBTOOL_CONFIG_H +#define OPT_CHROOT 20 +#endif + +/* + * Format specific routine table - MUST BE IN SORTED ORDER BY NAME + * (see pax.h for description of each function) + * + * name, blksz, hdsz, udev, hlk, blkagn, inhead, id, st_read, + * read, end_read, st_write, write, end_write, trail, + * subtrail, rd_data, wr_data, options + */ + +FSUB fsub[] = { +#ifndef NO_CPIO +/* 0: OLD BINARY CPIO */ + { "bcpio", 5120, sizeof(HD_BCPIO), 1, 0, 0, 1, bcpio_id, cpio_strd, + bcpio_rd, bcpio_endrd, cpio_stwr, bcpio_wr, cpio_endwr, NULL, + cpio_subtrail, rd_wrfile, wr_rdfile, bad_opt }, + +/* 1: OLD OCTAL CHARACTER CPIO */ + { "cpio", 5120, sizeof(HD_CPIO), 1, 0, 0, 1, cpio_id, cpio_strd, + cpio_rd, cpio_endrd, cpio_stwr, cpio_wr, cpio_endwr, NULL, + cpio_subtrail, rd_wrfile, wr_rdfile, bad_opt }, + +/* 2: SVR4 HEX CPIO */ + { "sv4cpio", 5120, sizeof(HD_VCPIO), 1, 0, 0, 1, vcpio_id, cpio_strd, + vcpio_rd, vcpio_endrd, cpio_stwr, vcpio_wr, cpio_endwr, NULL, + cpio_subtrail, rd_wrfile, wr_rdfile, bad_opt }, + +/* 3: SVR4 HEX CPIO WITH CRC */ + { "sv4crc", 5120, sizeof(HD_VCPIO), 1, 0, 0, 1, crc_id, crc_strd, + vcpio_rd, vcpio_endrd, crc_stwr, vcpio_wr, cpio_endwr, NULL, + cpio_subtrail, rd_wrfile, wr_rdfile, bad_opt }, +#endif +/* 4: OLD TAR */ + { "tar", 10240, BLKMULT, 0, 1, BLKMULT, 0, tar_id, no_op, + tar_rd, tar_endrd, no_op, tar_wr, tar_endwr, tar_trail, + NULL, rd_wrfile, wr_rdfile, tar_opt }, + +/* 5: POSIX USTAR */ + { "ustar", 10240, BLKMULT, 0, 1, BLKMULT, 0, ustar_id, ustar_strd, + ustar_rd, tar_endrd, ustar_stwr, ustar_wr, tar_endwr, tar_trail, + NULL, rd_wrfile, wr_rdfile, bad_opt } +}; +#ifndef NO_CPIO +#define F_BCPIO 0 /* old binary cpio format */ +#define F_CPIO 1 /* old octal character cpio format */ +#define F_SV4CPIO 2 /* SVR4 hex cpio format */ +#define F_SV4CRC 3 /* SVR4 hex with crc cpio format */ +#define F_TAR 4 /* old V7 UNIX tar format */ +#define F_USTAR 5 /* ustar format */ +#else +#define F_TAR 0 /* old V7 UNIX tar format */ +#define F_USTAR 1 /* ustar format */ +#endif +#define DEFLT F_USTAR /* default write format from list above */ + +/* + * ford is the archive search order used by get_arc() to determine what kind + * of archive we are dealing with. This helps to properly id archive formats + * some formats may be subsets of others.... + */ +int ford[] = {F_USTAR, F_TAR, +#ifndef NO_CPIO + F_SV4CRC, F_SV4CPIO, F_CPIO, F_BCPIO, +#endif + -1}; + +/* + * filename record separator + */ +int sep = '\n'; + +/* + * Do we have -C anywhere? + */ +int havechd = 0; + +/* + * options() + * figure out if we are pax, tar or cpio. Call the appropriate options + * parser + */ + +void +options(int argc, char **argv) +{ + + /* + * Are we acting like pax, tar or cpio (based on argv[0]) + */ + if ((argv0 = strrchr(argv[0], '/')) != NULL) + argv0++; + else + argv0 = argv[0]; + + if (strstr(argv0, NM_TAR)) { + argv0 = NM_TAR; + tar_options(argc, argv); +#ifndef NO_CPIO + } else if (strstr(argv0, NM_CPIO)) { + argv0 = NM_CPIO; + cpio_options(argc, argv); +#endif + } else { + argv0 = NM_PAX; + pax_options(argc, argv); + } +} + +struct option pax_longopts[] = { + { "insecure", no_argument, 0, + OPT_INSECURE }, + { "force-local", no_argument, 0, + OPT_FORCE_LOCAL }, + { "use-compress-program", required_argument, 0, + OPT_USE_COMPRESS_PROGRAM }, + { "xz", no_argument, 0, + OPT_XZ }, + { "gnu", no_argument, 0, + OPT_GNU }, + { "timestamp", required_argument, 0, + OPT_TIMESTAMP }, + { 0, 0, 0, + 0 }, +}; + +/* + * pax_options() + * look at the user specified flags. set globals as required and check if + * the user specified a legal set of flags. If not, complain and exit + */ + +static void +pax_options(int argc, char **argv) +{ + int c; + size_t i; + u_int64_t flg = 0; + u_int64_t bflg = 0; + char *pt; + FSUB tmp; + + /* + * process option flags + */ + while ((c = getopt_long(argc, argv, + "0ab:cdf:ijklno:p:rs:tuvwx:zAB:DE:G:HLMN:OPT:U:VXYZ", + pax_longopts, NULL)) != -1) { + switch (c) { + case '0': + sep = '\0'; + break; + case 'a': + /* + * append + */ + flg |= AF; + break; + case 'b': + /* + * specify blocksize + */ + flg |= BF; + if ((wrblksz = (int)str_offt(optarg)) <= 0) { + tty_warn(1, "Invalid block size %s", optarg); + pax_usage(); + } + break; + case 'c': + /* + * inverse match on patterns + */ + cflag = 1; + flg |= CF; + break; + case 'd': + /* + * match only dir on extract, not the subtree at dir + */ + dflag = 1; + flg |= DF; + break; + case 'f': + /* + * filename where the archive is stored + */ + arcname = optarg; + flg |= FF; + break; + case 'i': + /* + * interactive file rename + */ + iflag = 1; + flg |= IF; + break; + case 'j': + /* + * pass through bzip2 + */ + gzip_program = BZIP2_CMD; + break; + case 'k': + /* + * do not clobber files that exist + */ + kflag = 1; + flg |= KF; + break; + case 'l': + /* + * try to link src to dest with copy (-rw) + */ + lflag = 1; + flg |= LF; + break; + case 'n': + /* + * select first match for a pattern only + */ + nflag = 1; + flg |= NF; + break; + case 'o': + /* + * pass format specific options + */ + flg |= OF; + if (opt_add(optarg) < 0) + pax_usage(); + break; + case 'p': + /* + * specify file characteristic options + */ + for (pt = optarg; *pt != '\0'; ++pt) { + switch(*pt) { + case 'a': + /* + * do not preserve access time + */ + patime = 0; + break; + case 'e': + /* + * preserve user id, group id, file + * mode, access/modification times + * and file flags. + */ + pids = 1; + pmode = 1; + patime = 1; + pmtime = 1; + pfflags = 1; + break; +#if 0 + case 'f': + /* + * do not preserve file flags + */ + pfflags = 0; + break; +#endif + case 'm': + /* + * do not preserve modification time + */ + pmtime = 0; + break; + case 'o': + /* + * preserve uid/gid + */ + pids = 1; + break; + case 'p': + /* + * preserve file mode bits + */ + pmode = 1; + break; + default: + tty_warn(1, "Invalid -p string: %c", + *pt); + pax_usage(); + break; + } + } + flg |= PF; + break; + case 'r': + /* + * read the archive + */ + flg |= RF; + break; + case 's': + /* + * file name substitution name pattern + */ + if (rep_add(optarg) < 0) { + pax_usage(); + break; + } + flg |= SF; + break; + case 't': + /* + * preserve access time on filesystem nodes we read + */ + tflag = 1; + flg |= TF; + break; + case 'u': + /* + * ignore those older files + */ + uflag = 1; + flg |= UF; + break; + case 'v': + /* + * verbose operation mode + */ + vflag = 1; + flg |= VF; + break; + case 'w': + /* + * write an archive + */ + flg |= WF; + break; + case 'x': + /* + * specify an archive format on write + */ + tmp.name = optarg; + frmt = (FSUB *)bsearch((void *)&tmp, (void *)fsub, + sizeof(fsub)/sizeof(FSUB), sizeof(FSUB), c_frmt); + if (frmt != NULL) { + flg |= XF; + break; + } + tty_warn(1, "Unknown -x format: %s", optarg); + (void)fputs("pax: Known -x formats are:", stderr); + for (i = 0; i < (sizeof(fsub)/sizeof(FSUB)); ++i) + (void)fprintf(stderr, " %s", fsub[i].name); + (void)fputs("\n\n", stderr); + pax_usage(); + break; + case 'z': + /* + * use gzip. Non standard option. + */ + gzip_program = GZIP_CMD; + break; + case 'A': + Aflag = 1; + flg |= CAF; + break; + case 'B': + /* + * non-standard option on number of bytes written on a + * single archive volume. + */ + if ((wrlimit = str_offt(optarg)) <= 0) { + tty_warn(1, "Invalid write limit %s", optarg); + pax_usage(); + } + if (wrlimit % BLKMULT) { + tty_warn(1, + "Write limit is not a %d byte multiple", + BLKMULT); + pax_usage(); + } + flg |= CBF; + break; + case 'D': + /* + * On extraction check file inode change time before the + * modification of the file name. Non standard option. + */ + Dflag = 1; + flg |= CDF; + break; + case 'E': + /* + * non-standard limit on read faults + * 0 indicates stop after first error, values + * indicate a limit, "none" try forever + */ + flg |= CEF; + if (strcmp(none, optarg) == 0) + maxflt = -1; + else if ((maxflt = atoi(optarg)) < 0) { + tty_warn(1, + "Error count value must be positive"); + pax_usage(); + } + break; + case 'G': + /* + * non-standard option for selecting files within an + * archive by group (gid or name) + */ + if (grp_add(optarg) < 0) { + pax_usage(); + break; + } + flg |= CGF; + break; + case 'H': + /* + * follow command line symlinks only + */ + Hflag = 1; + flg |= CHF; + break; + case 'L': + /* + * follow symlinks + */ + Lflag = 1; + flg |= CLF; + break; +#ifdef SMALL + case 'M': + case 'N': + tty_warn(1, "Support for -%c is not compiled in", c); + exit(1); +#else /* !SMALL */ + case 'M': + /* + * Treat list of filenames on stdin as an + * mtree(8) specfile. Non standard option. + */ + Mflag = 1; + flg |= CMF; + break; + case 'N': + /* + * Use alternative directory for user db lookups. + */ + if (!setup_getid(optarg)) { + tty_warn(1, + "Unable to use user and group databases in `%s'", + optarg); + pax_usage(); + } + break; +#endif /* !SMALL */ + case 'O': + /* + * Force one volume. Non standard option. + */ + force_one_volume = 1; + break; + case 'P': + /* + * do NOT follow symlinks (default) + */ + Lflag = 0; + flg |= CPF; + break; + case 'T': + /* + * non-standard option for selecting files within an + * archive by modification time range (lower,upper) + */ + if (trng_add(optarg) < 0) { + pax_usage(); + break; + } + flg |= CTF; + break; + case 'U': + /* + * non-standard option for selecting files within an + * archive by user (uid or name) + */ + if (usr_add(optarg) < 0) { + pax_usage(); + break; + } + flg |= CUF; + break; + case 'V': + /* + * somewhat verbose operation mode (no listing) + */ + Vflag = 1; + flg |= VSF; + break; + case 'X': + /* + * do not pass over mount points in the file system + */ + Xflag = 1; + flg |= CXF; + break; + case 'Y': + /* + * On extraction check file inode change time after the + * modification of the file name. Non standard option. + */ + Yflag = 1; + flg |= CYF; + break; + case 'Z': + /* + * On extraction check modification time after the + * modification of the file name. Non standard option. + */ + Zflag = 1; + flg |= CZF; + break; + case OPT_INSECURE: + secure = 0; + break; + case OPT_FORCE_LOCAL: + forcelocal = 1; + break; + case OPT_USE_COMPRESS_PROGRAM: + gzip_program = optarg; + break; + case OPT_XZ: + gzip_program = XZ_CMD; + break; + case OPT_GNU: + is_gnutar = 1; + break; +#ifndef SMALL + case OPT_TIMESTAMP: + if (set_tstamp(optarg, &tst) == -1) { + tty_warn(1, "Invalid timestamp `%s'", optarg); + tar_usage(); + } + break; +#endif + case '?': + default: + pax_usage(); + break; + } + } + + /* + * figure out the operation mode of pax read,write,extract,copy,append + * or list. check that we have not been given a bogus set of flags + * for the operation mode. + */ + if (ISLIST(flg)) { + act = LIST; + listf = stdout; + bflg = flg & BDLIST; + } else if (ISEXTRACT(flg)) { + act = EXTRACT; + bflg = flg & BDEXTR; + } else if (ISARCHIVE(flg)) { + act = ARCHIVE; + bflg = flg & BDARCH; + } else if (ISAPPND(flg)) { + act = APPND; + bflg = flg & BDARCH; + } else if (ISCOPY(flg)) { + act = COPY; + bflg = flg & BDCOPY; + } else + pax_usage(); + if (bflg) { + printflg(flg); + pax_usage(); + } + + /* + * if we are writing (ARCHIVE) we use the default format if the user + * did not specify a format. when we write during an APPEND, we will + * adopt the format of the existing archive if none was supplied. + */ + if (!(flg & XF) && (act == ARCHIVE)) + frmt = &(fsub[DEFLT]); + + /* + * process the args as they are interpreted by the operation mode + */ + switch (act) { + case LIST: + case EXTRACT: + for (; optind < argc; optind++) + if (pat_add(argv[optind], NULL, 0) < 0) + pax_usage(); + break; + case COPY: + if (optind >= argc) { + tty_warn(0, "Destination directory was not supplied"); + pax_usage(); + } + --argc; + dirptr = argv[argc]; + if (mkpath(dirptr) < 0) + exit(1); + /* FALLTHROUGH */ + case ARCHIVE: + case APPND: + for (; optind < argc; optind++) + if (ftree_add(argv[optind], 0) < 0) + pax_usage(); + /* + * no read errors allowed on updates/append operation! + */ + maxflt = 0; + break; + } +} + + +/* + * tar_options() + * look at the user specified flags. set globals as required and check if + * the user specified a legal set of flags. If not, complain and exit + */ + +struct option tar_longopts[] = { + { "block-size", required_argument, 0, 'b' }, + { "bunzip2", no_argument, 0, 'j' }, + { "bzip2", no_argument, 0, 'j' }, + { "create", no_argument, 0, 'c' }, /* F */ + /* -e -- no corresponding long option */ + { "file", required_argument, 0, 'f' }, + { "dereference", no_argument, 0, 'h' }, + { "keep-old-files", no_argument, 0, 'k' }, + { "one-file-system", no_argument, 0, 'l' }, + { "modification-time", no_argument, 0, 'm' }, + { "old-archive", no_argument, 0, 'o' }, + { "portability", no_argument, 0, 'o' }, + { "same-permissions", no_argument, 0, 'p' }, + { "preserve-permissions", no_argument, 0, 'p' }, + { "preserve", no_argument, 0, 'p' }, + { "fast-read", no_argument, 0, 'q' }, + { "append", no_argument, 0, 'r' }, /* F */ + { "update", no_argument, 0, 'u' }, /* F */ + { "list", no_argument, 0, 't' }, /* F */ + { "verbose", no_argument, 0, 'v' }, + { "interactive", no_argument, 0, 'w' }, + { "confirmation", no_argument, 0, 'w' }, + { "extract", no_argument, 0, 'x' }, /* F */ + { "get", no_argument, 0, 'x' }, /* F */ + { "gzip", no_argument, 0, 'z' }, + { "gunzip", no_argument, 0, 'z' }, + { "read-full-blocks", no_argument, 0, 'B' }, + { "directory", required_argument, 0, 'C' }, + { "xz", no_argument, 0, 'J' }, + { "to-stdout", no_argument, 0, 'O' }, + { "absolute-paths", no_argument, 0, 'P' }, + { "sparse", no_argument, 0, 'S' }, + { "files-from", required_argument, 0, 'T' }, + { "summary", no_argument, 0, 'V' }, + { "stats", no_argument, 0, 'V' }, + { "exclude-from", required_argument, 0, 'X' }, + { "compress", no_argument, 0, 'Z' }, + { "uncompress", no_argument, 0, 'Z' }, + { "strict", no_argument, 0, + OPT_STRICT }, + { "atime-preserve", no_argument, 0, + OPT_ATIME_PRESERVE }, + { "unlink", no_argument, 0, + OPT_UNLINK }, + { "use-compress-program", required_argument, 0, + OPT_USE_COMPRESS_PROGRAM }, + { "force-local", no_argument, 0, + OPT_FORCE_LOCAL }, + { "insecure", no_argument, 0, + OPT_INSECURE }, + { "exclude", required_argument, 0, + OPT_EXCLUDE }, + { "no-recursion", no_argument, 0, + OPT_NORECURSE }, +#if !HAVE_NBTOOL_CONFIG_H + { "chroot", no_argument, 0, + OPT_CHROOT }, +#endif + { "timestamp", required_argument, 0, + OPT_TIMESTAMP }, +#if 0 /* Not implemented */ + { "catenate", no_argument, 0, 'A' }, /* F */ + { "concatenate", no_argument, 0, 'A' }, /* F */ + { "diff", no_argument, 0, 'd' }, /* F */ + { "compare", no_argument, 0, 'd' }, /* F */ + { "checkpoint", no_argument, 0, + OPT_CHECKPOINT }, + { "help", no_argument, 0, + OPT_HELP }, + { "info-script", required_argument, 0, 'F' }, + { "new-volume-script", required_argument, 0, 'F' }, + { "incremental", no_argument, 0, 'G' }, + { "listed-incremental", required_argument, 0, 'g' }, + { "ignore-zeros", no_argument, 0, 'i' }, + { "ignore-failed-read", no_argument, 0, + OPT_IGNORE_FAILED_READ }, + { "starting-file", no_argument, 0, 'K' }, + { "tape-length", required_argument, 0, 'L' }, + { "multi-volume", no_argument, 0, 'M' }, + { "after-date", required_argument, 0, 'N' }, + { "newer", required_argument, 0, 'N' }, + { "record-number", no_argument, 0, 'R' }, + { "remove-files", no_argument, 0, + OPT_REMOVE_FILES }, + { "same-order", no_argument, 0, 's' }, + { "preserve-order", no_argument, 0, 's' }, + { "null", no_argument, 0, + OPT_NULL }, + { "totals", no_argument, 0, + OPT_TOTALS }, + { "volume-name", required_argument, 0, 'V' }, /* XXX */ + { "label", required_argument, 0, 'V' }, /* XXX */ + { "version", no_argument, 0, + OPT_VERSION }, + { "verify", no_argument, 0, 'W' }, + { "block-compress", no_argument, 0, + OPT_BLOCK_COMPRESS }, +#endif + { 0, 0, 0, 0 }, +}; + +static void +tar_set_action(int op) +{ + if (act != ERROR && act != op) + tar_usage(); + act = op; +} + +static void +tar_options(int argc, char **argv) +{ + int c; + int fstdin = 0; + int Oflag = 0; + int nincfiles = 0; + int incfiles_max = 0; + struct incfile { + char *file; + char *dir; + }; + struct incfile *incfiles = NULL; + + /* + * Set default values. + */ + rmleadslash = 1; + is_gnutar = 1; + + /* + * process option flags + */ + while ((c = getoldopt(argc, argv, + "+b:cef:hjklmopqrs:tuvwxzBC:HI:JOPST:X:Z014578", + tar_longopts, NULL)) + != -1) { + switch(c) { + case 'b': + /* + * specify blocksize in 512-byte blocks + */ + if ((wrblksz = (int)str_offt(optarg)) <= 0) { + tty_warn(1, "Invalid block size %s", optarg); + tar_usage(); + } + wrblksz *= 512; /* XXX - check for int oflow */ + break; + case 'c': + /* + * create an archive + */ + tar_set_action(ARCHIVE); + break; + case 'e': + /* + * stop after first error + */ + maxflt = 0; + break; + case 'f': + /* + * filename where the archive is stored + */ + if ((optarg[0] == '-') && (optarg[1]== '\0')) { + /* + * treat a - as stdin + */ + fstdin = 1; + arcname = NULL; + break; + } + fstdin = 0; + arcname = optarg; + break; + case 'h': + /* + * follow symlinks + */ + Lflag = 1; + break; + case 'j': + /* + * pass through bzip2. not a standard option + */ + gzip_program = BZIP2_CMD; + break; + case 'k': + /* + * do not clobber files that exist + */ + kflag = 1; + break; + case 'l': + /* + * do not pass over mount points in the file system + */ + Xflag = 1; + break; + case 'm': + /* + * do not preserve modification time + */ + pmtime = 0; + break; + case 'o': + /* + * This option does several things based on whether + * this is a create or extract operation. + */ + if (act == ARCHIVE) { + /* GNU tar: write V7 format archives. */ + Oflag = 1; + /* 4.2BSD: don't add directory entries. */ + if (opt_add("write_opt=nodir") < 0) + tar_usage(); + + } else { + /* SUS: don't preserve owner/group. */ + pids = 0; + nopids = 1; + } + break; + case 'p': + /* + * preserve user id, group id, file + * mode, access/modification times + */ + if (!nopids) + pids = 1; + pmode = 1; + patime = 1; + pmtime = 1; + break; + case 'q': + /* + * select first match for a pattern only + */ + nflag = 1; + break; + case 'r': + case 'u': + /* + * append to the archive + */ + tar_set_action(APPND); + break; + case 's': + /* + * file name substitution name pattern + */ + if (rep_add(optarg) < 0) { + tar_usage(); + break; + } + break; + case 't': + /* + * list contents of the tape + */ + tar_set_action(LIST); + break; + case 'v': + /* + * verbose operation mode + */ + vflag = 1; + break; + case 'w': + /* + * interactive file rename + */ + iflag = 1; + break; + case 'x': + /* + * extract an archive, preserving mode, + * and mtime if possible. + */ + tar_set_action(EXTRACT); + pmtime = 1; + break; + case 'z': + /* + * use gzip. Non standard option. + */ + gzip_program = GZIP_CMD; + break; + case 'B': + /* + * Nothing to do here, this is pax default + */ + break; + case 'C': + havechd++; + chdname = optarg; + break; + case 'H': + /* + * follow command line symlinks only + */ + Hflag = 1; + break; + case 'I': + case 'T': + if (++nincfiles > incfiles_max) { + incfiles_max = nincfiles + 3; + incfiles = realloc(incfiles, + sizeof(*incfiles) * incfiles_max); + if (incfiles == NULL) { + tty_warn(0, "Unable to allocate space " + "for option list"); + exit(1); + } + } + incfiles[nincfiles - 1].file = optarg; + incfiles[nincfiles - 1].dir = chdname; + break; + case 'J': + gzip_program = XZ_CMD; + break; + case 'O': + Oflag = 1; + break; + case 'P': + /* + * do not remove leading '/' from pathnames + */ + rmleadslash = 0; + Aflag = 1; + break; + case 'S': + /* do nothing; we already generate sparse files */ + break; + case 'V': + /* + * semi-verbose operation mode (no listing) + */ + Vflag = 1; + break; + case 'X': + /* + * GNU tar compat: exclude the files listed in optarg + */ + if (tar_gnutar_X_compat(optarg) != 0) + tar_usage(); + break; + case 'Z': + /* + * use compress. + */ + gzip_program = COMPRESS_CMD; + break; + case '0': + arcname = DEV_0; + break; + case '1': + arcname = DEV_1; + break; + case '4': + arcname = DEV_4; + break; + case '5': + arcname = DEV_5; + break; + case '7': + arcname = DEV_7; + break; + case '8': + arcname = DEV_8; + break; + case OPT_ATIME_PRESERVE: + patime = 1; + break; + case OPT_UNLINK: + /* Just ignore -- we always unlink first. */ + break; + case OPT_USE_COMPRESS_PROGRAM: + gzip_program = optarg; + break; + case OPT_FORCE_LOCAL: + forcelocal = 1; + break; + case OPT_INSECURE: + secure = 0; + break; + case OPT_STRICT: + /* disable gnu extensions */ + is_gnutar = 0; + break; + case OPT_EXCLUDE: + if (tar_gnutar_minus_minus_exclude(optarg) != 0) + tar_usage(); + break; + case OPT_NORECURSE: + dflag = 1; + break; +#if !HAVE_NBTOOL_CONFIG_H + case OPT_CHROOT: + do_chroot = 1; + break; +#endif +#ifndef SMALL + case OPT_TIMESTAMP: + if (set_tstamp(optarg, &tst) == -1) { + tty_warn(1, "Invalid timestamp `%s'", optarg); + tar_usage(); + } + break; +#endif + default: + tar_usage(); + break; + } + } + argc -= optind; + argv += optind; + + /* Tar requires an action. */ + if (act == ERROR) + tar_usage(); + + /* Traditional tar behaviour (pax uses stderr unless in list mode) */ + if (fstdin == 1 && act == ARCHIVE) + listf = stderr; + else + listf = stdout; + + /* Traditional tar behaviour (pax wants to read file list from stdin) */ + if ((act == ARCHIVE || act == APPND) && argc == 0 && nincfiles == 0) + exit(0); + /* + * if we are writing (ARCHIVE) specify tar, otherwise run like pax + * (unless -o specified) + */ + if (act == ARCHIVE || act == APPND) + frmt = &(fsub[Oflag ? F_TAR : F_USTAR]); + else if (Oflag) { + if (act == EXTRACT) + to_stdout = 1; + else { + tty_warn(1, "The -O/-o options are only valid when " + "writing or extracting an archive"); + tar_usage(); + } + } + + /* + * process the args as they are interpreted by the operation mode + */ + switch (act) { + case LIST: + case EXTRACT: + default: + { + int sawpat = 0; + int dirisnext = 0; + char *file, *dir = NULL; + int mustfreedir = 0; + + while (nincfiles || *argv != NULL) { + /* + * If we queued up any include files, + * pull them in now. Otherwise, check + * for -I and -C positional flags. + * Anything else must be a file to + * extract. + */ + if (nincfiles) { + file = incfiles->file; + dir = incfiles->dir; + mustfreedir = 0; + incfiles++; + nincfiles--; + } else if (strcmp(*argv, "-I") == 0) { + if (*++argv == NULL) + break; + file = *argv++; + dir = chdname; + mustfreedir = 0; + } else { + file = NULL; + dir = NULL; + mustfreedir = 0; + } + if (file != NULL) { + FILE *fp; + char *str; + + if (strcmp(file, "-") == 0) + fp = stdin; + else if ((fp = fopen(file, "r")) == NULL) { + tty_warn(1, "Unable to open file '%s' for read", file); + tar_usage(); + } + while ((str = get_line(fp)) != NULL) { + if (dirisnext) { + if (dir && mustfreedir) + free(dir); + dir = str; + mustfreedir = 1; + dirisnext = 0; + continue; + } + if (strcmp(str, "-C") == 0) { + havechd++; + dirisnext = 1; + free(str); + continue; + } + if (strncmp(str, "-C ", 3) == 0) { + havechd++; + if (dir && mustfreedir) + free(dir); + dir = strdup(str + 3); + mustfreedir = 1; + free(str); + continue; + } + if (pat_add(str, dir, NOGLOB_MTCH) < 0) + tar_usage(); + sawpat = 1; + } + /* Bomb if given -C w/out a dir. */ + if (dirisnext) + tar_usage(); + if (dir && mustfreedir) + free(dir); + if (strcmp(file, "-") != 0) + fclose(fp); + if (get_line_error) { + tty_warn(1, "Problem with file '%s'", file); + tar_usage(); + } + } else if (strcmp(*argv, "-C") == 0) { + if (*++argv == NULL) + break; + chdname = *argv++; + havechd++; + } else if (pat_add(*argv++, chdname, 0) < 0) + tar_usage(); + else + sawpat = 1; + } + /* + * if patterns were added, we are doing chdir() + * on a file-by-file basis, else, just one + * global chdir (if any) after opening input. + */ + if (sawpat > 0) + chdname = NULL; + } + break; + case ARCHIVE: + case APPND: + if (chdname != NULL) { /* initial chdir() */ + if (ftree_add(chdname, 1) < 0) + tar_usage(); + } + + while (nincfiles || *argv != NULL) { + char *file, *dir; + + /* + * If we queued up any include files, pull them in + * now. Otherwise, check for -I and -C positional + * flags. Anything else must be a file to include + * in the archive. + */ + if (nincfiles) { + file = incfiles->file; + dir = incfiles->dir; + incfiles++; + nincfiles--; + } else if (strcmp(*argv, "-I") == 0) { + if (*++argv == NULL) + break; + file = *argv++; + dir = NULL; + } else { + file = NULL; + dir = NULL; + } + if (file != NULL) { + FILE *fp; + char *str; + int dirisnext = 0; + + /* Set directory if needed */ + if (dir) { + if (ftree_add(dir, 1) < 0) + tar_usage(); + } + + if (strcmp(file, "-") == 0) + fp = stdin; + else if ((fp = fopen(file, "r")) == NULL) { + tty_warn(1, "Unable to open file '%s' for read", file); + tar_usage(); + } + while ((str = get_line(fp)) != NULL) { + if (dirisnext) { + if (ftree_add(str, 1) < 0) + tar_usage(); + dirisnext = 0; + continue; + } + if (strcmp(str, "-C") == 0) { + dirisnext = 1; + continue; + } + if (strncmp(str, "-C ", 3) == 0) { + if (ftree_add(str + 3, 1) < 0) + tar_usage(); + continue; + } + if (ftree_add(str, 0) < 0) + tar_usage(); + } + /* Bomb if given -C w/out a dir. */ + if (dirisnext) + tar_usage(); + if (strcmp(file, "-") != 0) + fclose(fp); + if (get_line_error) { + tty_warn(1, "Problem with file '%s'", + file); + tar_usage(); + } + } else if (strcmp(*argv, "-C") == 0) { + if (*++argv == NULL) + break; + if (ftree_add(*argv++, 1) < 0) + tar_usage(); + } else if (ftree_add(*argv++, 0) < 0) + tar_usage(); + } + /* + * no read errors allowed on updates/append operation! + */ + maxflt = 0; + break; + } + if (!fstdin && ((arcname == NULL) || (*arcname == '\0'))) { + arcname = getenv("TAPE"); + if ((arcname == NULL) || (*arcname == '\0')) + arcname = _PATH_DEFTAPE; + } +} + +int +mkpath(char *path) +{ + char *slash; + int done = 0; + + slash = path; + + while (!done) { + slash += strspn(slash, "/"); + slash += strcspn(slash, "/"); + + done = (*slash == '\0'); + *slash = '\0'; + + if (domkdir(path, 0777) == -1) + goto out; + + if (!done) + *slash = '/'; + } + + return 0; +out: + /* Can't create or or not a directory */ + syswarn(1, errno, "Cannot create directory `%s'", path); + return -1; +} + + +#ifndef NO_CPIO +struct option cpio_longopts[] = { + { "reset-access-time", no_argument, 0, 'a' }, + { "make-directories", no_argument, 0, 'd' }, + { "nonmatching", no_argument, 0, 'f' }, + { "extract", no_argument, 0, 'i' }, + { "link", no_argument, 0, 'l' }, + { "preserve-modification-time", no_argument, 0, 'm' }, + { "create", no_argument, 0, 'o' }, + { "pass-through", no_argument, 0, 'p' }, + { "rename", no_argument, 0, 'r' }, + { "list", no_argument, 0, 't' }, + { "unconditional", no_argument, 0, 'u' }, + { "verbose", no_argument, 0, 'v' }, + { "append", no_argument, 0, 'A' }, + { "pattern-file", required_argument, 0, 'E' }, + { "file", required_argument, 0, 'F' }, + { "force-local", no_argument, 0, + OPT_FORCE_LOCAL }, + { "format", required_argument, 0, 'H' }, + { "dereference", no_argument, 0, 'L' }, + { "swap-halfwords", no_argument, 0, 'S' }, + { "summary", no_argument, 0, 'V' }, + { "stats", no_argument, 0, 'V' }, + { "insecure", no_argument, 0, + OPT_INSECURE }, + { "sparse", no_argument, 0, + OPT_SPARSE }, + { "xz", no_argument, 0, + OPT_XZ }, + +#ifdef notyet +/* Not implemented */ + { "null", no_argument, 0, '0' }, + { "swap", no_argument, 0, 'b' }, + { "numeric-uid-gid", no_argument, 0, 'n' }, + { "swap-bytes", no_argument, 0, 's' }, + { "message", required_argument, 0, 'M' }, + { "owner", required_argument, 0 'R' }, + { "dot", no_argument, 0, 'V' }, /* xxx */ + { "block-size", required_argument, 0, + OPT_BLOCK_SIZE }, + { "no-absolute-pathnames", no_argument, 0, + OPT_NO_ABSOLUTE_PATHNAMES }, + { "no-preserve-owner", no_argument, 0, + OPT_NO_PRESERVE_OWNER }, + { "only-verify-crc", no_argument, 0, + OPT_ONLY_VERIFY_CRC }, + { "rsh-command", required_argument, 0, + OPT_RSH_COMMAND }, + { "version", no_argument, 0, + OPT_VERSION }, +#endif + { 0, 0, 0, 0 }, +}; + +static void +cpio_set_action(int op) +{ + if ((act == APPND && op == ARCHIVE) || (act == ARCHIVE && op == APPND)) + act = APPND; + else if (act == EXTRACT && op == LIST) + act = op; + else if (act != ERROR && act != op) + cpio_usage(); + else + act = op; +} + +/* + * cpio_options() + * look at the user specified flags. set globals as required and check if + * the user specified a legal set of flags. If not, complain and exit + */ + +static void +cpio_options(int argc, char **argv) +{ + FSUB tmp; + u_int64_t flg = 0; + u_int64_t bflg = 0; + int c; + size_t i; + FILE *fp; + char *str; + + uflag = 1; + kflag = 1; + pids = 1; + pmode = 1; + pmtime = 0; + arcname = NULL; + dflag = 1; + nodirs = 1; + /* + * process option flags + */ + while ((c = getoldopt(argc, argv, + "+abcdfiklmoprstuvzABC:E:F:H:I:LM:O:R:SVZ6", + cpio_longopts, NULL)) != -1) { + switch(c) { + case 'a': + /* + * preserve access time on filesystem nodes we read + */ + tflag = 1; + flg |= TF; + break; +#ifdef notyet + case 'b': + /* + * swap bytes and half-words when reading data + */ + break; +#endif + case 'c': + /* + * ASCII cpio header + */ + frmt = &fsub[F_SV4CPIO]; + break; + case 'd': + /* + * create directories as needed + * pax does this by default .. + */ + nodirs = 0; + break; + case 'f': + /* + * inverse match on patterns + */ + cflag = 1; + flg |= CF; + break; + case 'i': + /* + * read the archive + */ + cpio_set_action(EXTRACT); + flg |= RF; + break; +#ifdef notyet + case 'k': + break; +#endif + case 'l': + /* + * try to link src to dest with copy (-rw) + */ + lflag = 1; + flg |= LF; + break; + case 'm': + /* + * preserve mtime + */ + flg |= PF; + pmtime = 1; + break; + case 'o': + /* + * write an archive + */ + cpio_set_action(ARCHIVE); + frmt = &(fsub[F_SV4CRC]); + flg |= WF; + break; + case 'p': + /* + * cpio -p is like pax -rw + */ + cpio_set_action(COPY); + flg |= RF | WF; + break; + case 'r': + /* + * interactive file rename + */ + iflag = 1; + flg |= IF; + break; +#ifdef notyet + case 's': + /* + * swap bytes after reading data + */ + break; +#endif + case 't': + /* + * list contents of archive + */ + cpio_set_action(LIST); + listf = stdout; + flg &= ~RF; + break; + case 'u': + /* + * don't ignore those older files + */ + uflag = 0; + kflag = 0; + flg |= UF; + break; + case 'v': + /* + * verbose operation mode + */ + vflag = 1; + flg |= VF; + break; + case 'z': + /* + * use gzip. Non standard option. + */ + gzip_program = GZIP_CMD; + break; + case 'A': + /* + * append to an archive + */ + cpio_set_action(APPND); + flg |= AF; + break; + case 'B': + /* + * set blocksize to 5120 + */ + blksz = 5120; + break; + case 'C': + /* + * specify blocksize + */ + if ((blksz = (int)str_offt(optarg)) <= 0) { + tty_warn(1, "Invalid block size %s", optarg); + cpio_usage(); + } + break; + case 'E': + /* + * file with patterns to extract or list + */ + if ((fp = fopen(optarg, "r")) == NULL) { + tty_warn(1, "Unable to open file '%s' for read", + optarg); + cpio_usage(); + } + while ((str = get_line(fp)) != NULL) { + pat_add(str, NULL, 0); + } + fclose(fp); + if (get_line_error) { + tty_warn(1, "Problem with file '%s'", optarg); + cpio_usage(); + } + break; + case 'H': + /* + * specify an archive format on write + */ + tmp.name = optarg; + frmt = (FSUB *)bsearch((void *)&tmp, (void *)fsub, + sizeof(fsub)/sizeof(FSUB), sizeof(FSUB), c_frmt); + if (frmt != NULL) { + flg |= XF; + break; + } + tty_warn(1, "Unknown -H format: %s", optarg); + (void)fputs("cpio: Known -H formats are:", stderr); + for (i = 0; i < (sizeof(fsub)/sizeof(FSUB)); ++i) + (void)fprintf(stderr, " %s", fsub[i].name); + (void)fputs("\n\n", stderr); + cpio_usage(); + break; + case 'F': + case 'I': + case 'O': + /* + * filename where the archive is stored + */ + if ((optarg[0] == '-') && (optarg[1]== '\0')) { + /* + * treat a - as stdin + */ + arcname = NULL; + break; + } + arcname = optarg; + break; + case 'L': + /* + * follow symlinks + */ + Lflag = 1; + flg |= CLF; + break; +#ifdef notyet + case 'M': + arg = optarg; + break; + case 'R': + arg = optarg; + break; +#endif + case 'S': + /* + * swap halfwords after reading data + */ + cpio_swp_head = 1; + break; +#ifdef notyet + case 'V': /* print a '.' for each file processed */ + break; +#endif + case 'V': + /* + * semi-verbose operation mode (no listing) + */ + Vflag = 1; + flg |= VF; + break; + case 'Z': + /* + * use compress. Non standard option. + */ + gzip_program = COMPRESS_CMD; + break; + case '6': + /* + * process Version 6 cpio format + */ + frmt = &(fsub[F_BCPIO]); + break; + case OPT_FORCE_LOCAL: + forcelocal = 1; + break; + case OPT_INSECURE: + secure = 0; + break; + case OPT_SPARSE: + /* do nothing; we already generate sparse files */ + break; + case OPT_XZ: + gzip_program = XZ_CMD; + break; + default: + cpio_usage(); + break; + } + } + + /* + * figure out the operation mode of cpio. check that we have not been + * given a bogus set of flags for the operation mode. + */ + if (ISLIST(flg)) { + act = LIST; + bflg = flg & BDLIST; + } else if (ISEXTRACT(flg)) { + act = EXTRACT; + bflg = flg & BDEXTR; + } else if (ISARCHIVE(flg)) { + act = ARCHIVE; + bflg = flg & BDARCH; + } else if (ISAPPND(flg)) { + act = APPND; + bflg = flg & BDARCH; + } else if (ISCOPY(flg)) { + act = COPY; + bflg = flg & BDCOPY; + } else + cpio_usage(); + if (bflg) { + cpio_usage(); + } + + /* + * if we are writing (ARCHIVE) we use the default format if the user + * did not specify a format. when we write during an APPEND, we will + * adopt the format of the existing archive if none was supplied. + */ + if (!(flg & XF) && (act == ARCHIVE)) + frmt = &(fsub[F_BCPIO]); + + /* + * process the args as they are interpreted by the operation mode + */ + switch (act) { + case LIST: + case EXTRACT: + for (; optind < argc; optind++) + if (pat_add(argv[optind], NULL, 0) < 0) + cpio_usage(); + break; + case COPY: + if (optind >= argc) { + tty_warn(0, "Destination directory was not supplied"); + cpio_usage(); + } + --argc; + dirptr = argv[argc]; + /* FALLTHROUGH */ + case ARCHIVE: + case APPND: + if (argc != optind) { + for (; optind < argc; optind++) + if (ftree_add(argv[optind], 0) < 0) + cpio_usage(); + break; + } + /* + * no read errors allowed on updates/append operation! + */ + maxflt = 0; + while ((str = get_line(stdin)) != NULL) { + ftree_add(str, 0); + } + if (get_line_error) { + tty_warn(1, "Problem while reading stdin"); + cpio_usage(); + } + break; + default: + cpio_usage(); + break; + } +} +#endif + +/* + * printflg() + * print out those invalid flag sets found to the user + */ + +static void +printflg(unsigned int flg) +{ + int nxt; + + (void)fprintf(stderr,"%s: Invalid combination of options:", argv0); + while ((nxt = ffs(flg)) != 0) { + flg &= ~(1 << (nxt - 1)); + (void)fprintf(stderr, " -%c", flgch[nxt - 1]); + } + (void)putc('\n', stderr); +} + +/* + * c_frmt() + * comparison routine used by bsearch to find the format specified + * by the user + */ + +static int +c_frmt(const void *a, const void *b) +{ + return strcmp(((const FSUB *)a)->name, ((const FSUB *)b)->name); +} + +/* + * opt_next() + * called by format specific options routines to get each format specific + * flag and value specified with -o + * Return: + * pointer to next OPLIST entry or NULL (end of list). + */ + +OPLIST * +opt_next(void) +{ + OPLIST *opt; + + if ((opt = ophead) != NULL) + ophead = ophead->fow; + return opt; +} + +/* + * bad_opt() + * generic routine used to complain about a format specific options + * when the format does not support options. + */ + +int +bad_opt(void) +{ + OPLIST *opt; + + if (ophead == NULL) + return 0; + /* + * print all we were given + */ + tty_warn(1," These format options are not supported for %s", + frmt->name); + while ((opt = opt_next()) != NULL) + (void)fprintf(stderr, "\t%s = %s\n", opt->name, opt->value); + if (strcmp(NM_TAR, argv0) == 0) + tar_usage(); +#ifndef NO_CPIO + else if (strcmp(NM_CPIO, argv0) == 0) + cpio_usage(); +#endif + else + pax_usage(); + return 0; +} + +/* + * opt_add() + * breaks the value supplied to -o into a option name and value. options + * are given to -o in the form -o name-value,name=value + * multiple -o may be specified. + * Return: + * 0 if format in name=value format, -1 if -o is passed junk + */ + +int +opt_add(const char *str) +{ + OPLIST *opt; + char *frpt; + char *pt; + char *endpt; + char *dstr; + + if ((str == NULL) || (*str == '\0')) { + tty_warn(0, "Invalid option name"); + return -1; + } + if ((dstr = strdup(str)) == NULL) { + tty_warn(0, "Unable to allocate space for option list"); + return -1; + } + frpt = endpt = dstr; + + /* + * break into name and values pieces and stuff each one into a + * OPLIST structure. When we know the format, the format specific + * option function will go through this list + */ + while ((frpt != NULL) && (*frpt != '\0')) { + if ((endpt = strchr(frpt, ',')) != NULL) + *endpt = '\0'; + if ((pt = strchr(frpt, '=')) == NULL) { + tty_warn(0, "Invalid options format"); + free(dstr); + return -1; + } + if ((opt = (OPLIST *)malloc(sizeof(OPLIST))) == NULL) { + tty_warn(0, "Unable to allocate space for option list"); + free(dstr); + return -1; + } + *pt++ = '\0'; + opt->name = frpt; + opt->value = pt; + opt->fow = NULL; + if (endpt != NULL) + frpt = endpt + 1; + else + frpt = NULL; + if (ophead == NULL) { + optail = ophead = opt; + continue; + } + optail->fow = opt; + optail = opt; + } + return 0; +} + +/* + * str_offt() + * Convert an expression of the following forms to an off_t > 0. + * 1) A positive decimal number. + * 2) A positive decimal number followed by a b (mult by 512). + * 3) A positive decimal number followed by a k (mult by 1024). + * 4) A positive decimal number followed by a m (mult by 512). + * 5) A positive decimal number followed by a w (mult by sizeof int) + * 6) Two or more positive decimal numbers (with/without k,b or w). + * separated by x (also * for backwards compatibility), specifying + * the product of the indicated values. + * Return: + * 0 for an error, a positive value o.w. + */ + +static off_t +str_offt(char *val) +{ + char *expr; + off_t num, t; + + num = STRTOOFFT(val, &expr, 0); + if ((num == OFFT_MAX) || (num <= 0) || (expr == val)) + return 0; + + switch(*expr) { + case 'b': + t = num; + num *= 512; + if (t > num) + return 0; + ++expr; + break; + case 'k': + t = num; + num *= 1024; + if (t > num) + return 0; + ++expr; + break; + case 'm': + t = num; + num *= 1048576; + if (t > num) + return 0; + ++expr; + break; + case 'w': + t = num; + num *= sizeof(int); + if (t > num) + return 0; + ++expr; + break; + } + + switch(*expr) { + case '\0': + break; + case '*': + case 'x': + t = num; + num *= str_offt(expr + 1); + if (t > num) + return 0; + break; + default: + return 0; + } + return num; +} + +static char * +get_line(FILE *f) +{ + char *name, *temp; + size_t len; + + name = fgetln(f, &len); + if (!name) { + get_line_error = ferror(f) ? GETLINE_FILE_CORRUPT : 0; + return 0; + } + if (name[len-1] != '\n') + len++; + temp = malloc(len); + if (!temp) { + get_line_error = GETLINE_OUT_OF_MEM; + return 0; + } + memcpy(temp, name, len-1); + temp[len-1] = 0; + return temp; +} + +#ifndef SMALL +/* + * set_tstamp() + * Use a specific timestamp for all individual files created in the + * archive + */ +static int +set_tstamp(const char *b, struct stat *st) +{ + time_t when; + char *eb; + long long l; + + if (stat(b, st) != -1) + return 0; + +#ifndef HAVE_NBTOOL_CONFIG_H + errno = 0; + if ((when = parsedate(b, NULL, NULL)) == -1 && errno != 0) +#endif + { + errno = 0; + l = strtoll(b, &eb, 0); + if (b == eb || *eb || errno) + return -1; + when = (time_t)l; + } + + st->st_ino = 1; +#if HAVE_STRUCT_STAT_BIRTHTIME + st->st_birthtime = +#endif + st->st_mtime = st->st_ctime = st->st_atime = when; + return 0; +} +#endif + +/* + * no_op() + * for those option functions where the archive format has nothing to do. + * Return: + * 0 + */ + +static int +no_op(void) +{ + return 0; +} + +/* + * pax_usage() + * print the usage summary to the user + */ + +static void +pax_usage(void) +{ + fprintf(stderr, +"usage: pax [-0cdjnvzVO] [-E limit] [-f archive] [-N dbdir] [-s replstr] ...\n" +" [-U user] ... [-G group] ... [-T [from_date][,to_date]] ...\n" +" [pattern ...]\n"); + fprintf(stderr, +" pax -r [-cdijknuvzADOVYZ] [-E limit] [-f archive] [-N dbdir]\n" +" [-o options] ... [-p string] ... [-s replstr] ... [-U user] ...\n" +" [-G group] ... [-T [from_date][,to_date]] ... [pattern ...]\n"); + fprintf(stderr, +" pax -w [-dijtuvzAHLMOPVX] [-b blocksize] [[-a] [-f archive]] [-x format]\n" +" [-B bytes] [-N dbdir] [-o options] ... [-s replstr] ...\n" +" [-U user] ... [-G group] ...\n" +" [-T [from_date][,to_date][/[c][m]]] ... [file ...]\n"); + fprintf(stderr, +" pax -r -w [-dijklntuvzADHLMOPVXYZ] [-N dbdir] [-p string] ...\n" +" [-s replstr] ... [-U user] ... [-G group] ...\n" +" [-T [from_date][,to_date][/[c][m]]] ... [file ...] directory\n"); + exit(1); + /* NOTREACHED */ +} + +/* + * tar_usage() + * print the usage summary to the user + */ + +static void +tar_usage(void) +{ + (void)fputs("usage: tar [-]{crtux}[-befhjklmopqvwzHJOPSXZ014578] " + "[archive] [blocksize]\n" + " [-C directory] [-T file] [-s replstr] " + "[file ...]\n", stderr); + exit(1); + /* NOTREACHED */ +} + +#ifndef NO_CPIO +/* + * cpio_usage() + * print the usage summary to the user + */ + +static void +cpio_usage(void) +{ + + (void)fputs("usage: cpio -o [-aABcLvzZ] [-C bytes] [-F archive] " + "[-H format] [-O archive]\n" + " < name-list [> archive]\n" + " cpio -i [-bBcdfmrsStuvzZ6] [-C bytes] [-E file] " + "[-F archive] [-H format] \n" + " [-I archive] " + "[pattern ...] [< archive]\n" + " cpio -p [-adlLmuv] destination-directory " + "< name-list\n", stderr); + exit(1); + /* NOTREACHED */ +} +#endif diff --git a/bin/pax/options.h b/bin/pax/options.h new file mode 100644 index 0000000..6517174 --- /dev/null +++ b/bin/pax/options.h @@ -0,0 +1,116 @@ +/* $NetBSD: options.h,v 1.11 2007/04/23 18:40:22 christos Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)options.h 8.2 (Berkeley) 4/18/94 + */ + +/* + * argv[0] names. Used for tar and cpio emulation + */ + +#define NM_TAR "tar" +#define NM_CPIO "cpio" +#define NM_PAX "pax" + +/* special value for -E */ +#define none "none" + +/* + * Constants used to specify the legal sets of flags in pax. For each major + * operation mode of pax, a set of illegal flags is defined. If any one of + * those illegal flags are found set, we scream and exit + */ + +/* + * flags (one for each option). + */ +#define AF 0x000000001ULL +#define BF 0x000000002ULL +#define CF 0x000000004ULL +#define DF 0x000000008ULL +#define FF 0x000000010ULL +#define IF 0x000000020ULL +#define KF 0x000000040ULL +#define LF 0x000000080ULL +#define NF 0x000000100ULL +#define OF 0x000000200ULL +#define PF 0x000000400ULL +#define RF 0x000000800ULL +#define SF 0x000001000ULL +#define TF 0x000002000ULL +#define UF 0x000004000ULL +#define VF 0x000008000ULL +#define WF 0x000010000ULL +#define XF 0x000020000ULL +#define CAF 0x000040000ULL /* nonstandard extension */ +#define CBF 0x000080000ULL /* nonstandard extension */ +#define CDF 0x000100000ULL /* nonstandard extension */ +#define CEF 0x000200000ULL /* nonstandard extension */ +#define CGF 0x000400000ULL /* nonstandard extension */ +#define CHF 0x000800000ULL /* nonstandard extension */ +#define CLF 0x001000000ULL /* nonstandard extension */ +#define CMF 0x002000000ULL /* nonstandard extension */ +#define CPF 0x004000000ULL /* nonstandard extension */ +#define CTF 0x008000000ULL /* nonstandard extension */ +#define CUF 0x010000000ULL /* nonstandard extension */ +#define VSF 0x020000000ULL /* non-standard */ +#define CXF 0x040000000ULL +#define CYF 0x080000000ULL /* nonstandard extension */ +#define CZF 0x100000000ULL /* nonstandard extension */ + +/* + * ascii string indexed by bit position above (alter the above and you must + * alter this string) used to tell the user what flags caused us to complain + */ +#define FLGCH "abcdfiklnoprstuvwxABDEGHLMPTUVXYZ" + +/* + * legal pax operation bit patterns + */ + +#define ISLIST(x) (((x) & (RF|WF)) == 0) +#define ISEXTRACT(x) (((x) & (RF|WF)) == RF) +#define ISARCHIVE(x) (((x) & (AF|RF|WF)) == WF) +#define ISAPPND(x) (((x) & (AF|RF|WF)) == (AF|WF)) +#define ISCOPY(x) (((x) & (RF|WF)) == (RF|WF)) +#define ISWRITE(x) (((x) & (RF|WF)) == WF) + +/* + * Illegal option flag subsets based on pax operation + */ + +#define BDEXTR (AF|BF|LF|TF|WF|XF|CBF|CHF|CLF|CMF|CPF|CXF) +#define BDARCH (CF|KF|LF|NF|PF|RF|CDF|CEF|CYF|CZF) +#define BDCOPY (AF|BF|FF|OF|XF|CAF|CBF|CEF) +#define BDLIST (AF|BF|IF|KF|LF|OF|PF|RF|TF|UF|WF|XF|CBF|CDF|CHF|CLF|CMF|CPF|CXF|CYF|CZF) diff --git a/bin/pax/pat_rep.c b/bin/pax/pat_rep.c new file mode 100644 index 0000000..7dbcda7 --- /dev/null +++ b/bin/pax/pat_rep.c @@ -0,0 +1,1139 @@ +/* $NetBSD: pat_rep.c,v 1.30 2018/06/13 15:14:40 christos Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +#if !defined(lint) +#if 0 +static char sccsid[] = "@(#)pat_rep.c 8.2 (Berkeley) 4/18/94"; +#else +__RCSID("$NetBSD: pat_rep.c,v 1.30 2018/06/13 15:14:40 christos Exp $"); +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "pat_rep.h" +#include "extern.h" + +/* + * routines to handle pattern matching, name modification (regular expression + * substitution and interactive renames), and destination name modification for + * copy (-rw). Both file name and link names are adjusted as required in these + * routines. + */ + +#define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */ +static PATTERN *pathead = NULL; /* file pattern match list head */ +static PATTERN *pattail = NULL; /* file pattern match list tail */ +static REPLACE *rephead = NULL; /* replacement string list head */ +static REPLACE *reptail = NULL; /* replacement string list tail */ + +static int rep_name(char *, size_t, int *, int); +static int tty_rename(ARCHD *); +static int fix_path(char *, int *, char *, int); +static int fn_match(char *, char *, char **, int); +static char * range_match(char *, int); +static int checkdotdot(const char *); +static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *); + +/* + * rep_add() + * parses the -s replacement string; compiles the regular expression + * and stores the compiled value and its replacement string together in + * replacement string list. Input to this function is of the form: + * /old/new/pg + * The first char in the string specifies the delimiter used by this + * replacement string. "Old" is a regular expression in "ed" format which + * is compiled by regcomp() and is applied to filenames. "new" is the + * substitution string; p and g are options flags for printing and global + * replacement (over the single filename) + * Return: + * 0 if a proper replacement string and regular expression was added to + * the list of replacement patterns; -1 otherwise. + */ + +int +rep_add(char *str) +{ + char *pt1; + char *pt2; + REPLACE *rep; + int res; + char rebuf[BUFSIZ]; + + /* + * throw out the bad parameters + */ + if ((str == NULL) || (*str == '\0')) { + tty_warn(1, "Empty replacement string"); + return -1; + } + + /* + * first character in the string specifies what the delimiter is for + * this expression. + */ + for (pt1 = str+1; *pt1; pt1++) { + if (*pt1 == '\\') { + pt1++; + continue; + } + if (*pt1 == *str) + break; + } + if (*pt1 == 0) { + tty_warn(1, "Invalid replacement string %s", str); + return -1; + } + + /* + * allocate space for the node that handles this replacement pattern + * and split out the regular expression and try to compile it + */ + if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) { + tty_warn(1, "Unable to allocate memory for replacement string"); + return -1; + } + + *pt1 = '\0'; + if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) { + regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf)); + tty_warn(1, "%s while compiling regular expression %s", rebuf, + str); + (void)free((char *)rep); + return -1; + } + + /* + * put the delimiter back in case we need an error message and + * locate the delimiter at the end of the replacement string + * we then point the node at the new substitution string + */ + *pt1++ = *str; + for (pt2 = pt1; *pt2; pt2++) { + if (*pt2 == '\\') { + pt2++; + continue; + } + if (*pt2 == *str) + break; + } + if (*pt2 == 0) { + regfree(&(rep->rcmp)); + (void)free((char *)rep); + tty_warn(1, "Invalid replacement string %s", str); + return -1; + } + + *pt2 = '\0'; + + /* Make sure to dup replacement, who knows where it came from! */ + if ((rep->nstr = strdup(pt1)) == NULL) { + regfree(&(rep->rcmp)); + (void)free((char *)rep); + tty_warn(1, "Unable to allocate memory for replacement string"); + return -1; + } + + pt1 = pt2++; + rep->flgs = 0; + + /* + * set the options if any + */ + while (*pt2 != '\0') { + switch(*pt2) { + case 'g': + case 'G': + rep->flgs |= GLOB; + break; + case 'p': + case 'P': + rep->flgs |= PRNT; + break; + case 's': + case 'S': + rep->flgs |= SYML; + break; + default: + regfree(&(rep->rcmp)); + (void)free((char *)rep); + *pt1 = *str; + tty_warn(1, "Invalid replacement string option %s", + str); + return -1; + } + ++pt2; + } + + /* + * all done, link it in at the end + */ + rep->fow = NULL; + if (rephead == NULL) { + reptail = rephead = rep; + return 0; + } + reptail->fow = rep; + reptail = rep; + return 0; +} + +/* + * pat_add() + * add a pattern match to the pattern match list. Pattern matches are used + * to select which archive members are extracted. (They appear as + * arguments to pax in the list and read modes). If no patterns are + * supplied to pax, all members in the archive will be selected (and the + * pattern match list is empty). + * + * Return: + * 0 if the pattern was added to the list, -1 otherwise + */ + +int +pat_add(char *str, char *chdn, int flags) +{ + PATTERN *pt; + + /* + * throw out the junk + */ + if ((str == NULL) || (*str == '\0')) { + tty_warn(1, "Empty pattern string"); + return -1; + } + + /* + * allocate space for the pattern and store the pattern. the pattern is + * part of argv so do not bother to copy it, just point at it. Add the + * node to the end of the pattern list + */ + if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) { + tty_warn(1, "Unable to allocate memory for pattern string"); + return -1; + } + + pt->pstr = str; + pt->pend = NULL; + pt->plen = strlen(str); + pt->fow = NULL; + pt->flgs = flags; + pt->chdname = chdn; + if (pathead == NULL) { + pattail = pathead = pt; + return 0; + } + pattail->fow = pt; + pattail = pt; + return 0; +} + +/* + * pat_chk() + * complain if any the user supplied pattern did not result in a match to + * a selected archive member. + */ + +void +pat_chk(void) +{ + PATTERN *pt; + int wban = 0; + + /* + * walk down the list checking the flags to make sure MTCH was set, + * if not complain + */ + for (pt = pathead; pt != NULL; pt = pt->fow) { + if (pt->flgs & MTCH) + continue; + if (!wban) { + tty_warn(1, "WARNING! These patterns were not matched:"); + ++wban; + } + (void)fprintf(stderr, "%s\n", pt->pstr); + } +} + +/* + * pat_sel() + * the archive member which matches a pattern was selected. Mark the + * pattern as having selected an archive member. arcn->pat points at the + * pattern that was matched. arcn->pat is set in pat_match() + * + * NOTE: When the -c option is used, we are called when there was no match + * by pat_match() (that means we did match before the inverted sense of + * the logic). Now this seems really strange at first, but with -c we + * need to keep track of those patterns that cause an archive member to + * NOT be selected (it found an archive member with a specified pattern) + * Return: + * 0 if the pattern pointed at by arcn->pat was tagged as creating a + * match, -1 otherwise. + */ + +int +pat_sel(ARCHD *arcn) +{ + PATTERN *pt; + PATTERN **ppt; + int len; + + /* + * if no patterns just return + */ + if ((pathead == NULL) || ((pt = arcn->pat) == NULL)) + return 0; + + /* + * when we are NOT limited to a single match per pattern mark the + * pattern and return + */ + if (!nflag) { + pt->flgs |= MTCH; + return 0; + } + + /* + * we reach this point only when we allow a single selected match per + * pattern, if the pattern matches a directory and we do not have -d + * (dflag) we are done with this pattern. We may also be handed a file + * in the subtree of a directory. in that case when we are operating + * with -d, this pattern was already selected and we are done + */ + if (pt->flgs & DIR_MTCH) + return 0; + + if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) { + /* + * ok we matched a directory and we are allowing + * subtree matches but because of the -n only its children will + * match. This is tagged as a DIR_MTCH type. + * WATCH IT, the code assumes that pt->pend points + * into arcn->name and arcn->name has not been modified. + * If not we will have a big mess. Yup this is another kludge + */ + + /* + * if this was a prefix match, remove trailing part of path + * so we can copy it. Future matches will be exact prefix match + */ + if (pt->pend != NULL) + *pt->pend = '\0'; + + if ((pt->pstr = strdup(arcn->name)) == NULL) { + tty_warn(1, "Pattern select out of memory"); + if (pt->pend != NULL) + *pt->pend = '/'; + pt->pend = NULL; + return -1; + } + + /* + * put the trailing / back in the source string + */ + if (pt->pend != NULL) { + *pt->pend = '/'; + pt->pend = NULL; + } + pt->plen = strlen(pt->pstr); + + /* + * strip off any trailing /, this should really never happen + */ + len = pt->plen - 1; + if (*(pt->pstr + len) == '/') { + *(pt->pstr + len) = '\0'; + pt->plen = len; + } + pt->flgs = DIR_MTCH | MTCH; + arcn->pat = pt; + return 0; + } + + /* + * we are then done with this pattern, so we delete it from the list + * because it can never be used for another match. + * Seems kind of strange to do for a -c, but the pax spec is really + * vague on the interaction of -c, -n, and -d. We assume that when -c + * and the pattern rejects a member (i.e. it matched it) it is done. + * In effect we place the order of the flags as having -c last. + */ + pt = pathead; + ppt = &pathead; + while ((pt != NULL) && (pt != arcn->pat)) { + ppt = &(pt->fow); + pt = pt->fow; + } + + if (pt == NULL) { + /* + * should never happen.... + */ + tty_warn(1, "Pattern list inconsistent"); + return -1; + } + *ppt = pt->fow; + (void)free((char *)pt); + arcn->pat = NULL; + return 0; +} + +/* + * pat_match() + * see if this archive member matches any supplied pattern, if a match + * is found, arcn->pat is set to point at the potential pattern. Later if + * this archive member is "selected" we process and mark the pattern as + * one which matched a selected archive member (see pat_sel()) + * Return: + * 0 if this archive member should be processed, 1 if it should be + * skipped and -1 if we are done with all patterns (and pax should quit + * looking for more members) + */ + +int +pat_match(ARCHD *arcn) +{ + PATTERN *pt; + + arcn->pat = NULL; + + /* + * if there are no more patterns and we have -n (and not -c) we are + * done. otherwise with no patterns to match, matches all + */ + if (pathead == NULL) { + if (nflag && !cflag) + return -1; + return 0; + } + + /* + * have to search down the list one at a time looking for a match. + */ + pt = pathead; + while (pt != NULL) { + /* + * check for a file name match unless we have DIR_MTCH set in + * this pattern then we want a prefix match + */ + if (pt->flgs & DIR_MTCH) { + /* + * this pattern was matched before to a directory + * as we must have -n set for this (but not -d). We can + * only match CHILDREN of that directory so we must use + * an exact prefix match (no wildcards). + */ + if ((arcn->name[pt->plen] == '/') && + (strncmp(pt->pstr, arcn->name, pt->plen) == 0)) + break; + } else if (fn_match(pt->pstr, arcn->name, &pt->pend, + pt->flgs & NOGLOB_MTCH) == 0) + break; + pt = pt->fow; + } + + /* + * return the result, remember that cflag (-c) inverts the sense of a + * match + */ + if (pt == NULL) + return cflag ? 0 : 1; + + /* + * we had a match, now when we invert the sense (-c) we reject this + * member. However we have to tag the pattern a being successful, (in a + * match, not in selecting an archive member) so we call pat_sel() + * here. + */ + arcn->pat = pt; + if (!cflag) + return 0; + + if (pat_sel(arcn) < 0) + return -1; + arcn->pat = NULL; + return 1; +} + +/* + * fn_match() + * Return: + * 0 if this archive member should be processed, 1 if it should be + * skipped and -1 if we are done with all patterns (and pax should quit + * looking for more members) + * Note: *pend may be changed to show where the prefix ends. + */ + +static int +fn_match(char *pattern, char *string, char **pend, int noglob) +{ + char c; + char test; + + *pend = NULL; + for (;;) { + switch (c = *pattern++) { + case '\0': + /* + * Ok we found an exact match + */ + if (*string == '\0') + return 0; + + /* + * Check if it is a prefix match + */ + if ((dflag == 1) || (*string != '/')) + return -1; + + /* + * It is a prefix match, remember where the trailing + * / is located + */ + *pend = string; + return 0; + case '?': + if (noglob) + goto regular; + if ((test = *string++) == '\0') + return (-1); + break; + case '*': + if (noglob) + goto regular; + c = *pattern; + /* + * Collapse multiple *'s. + */ + while (c == '*') + c = *++pattern; + + /* + * Optimized hack for pattern with a * at the end + */ + if (c == '\0') + return (0); + + /* + * General case, use recursion. + */ + while ((test = *string) != '\0') { + if (!fn_match(pattern, string, pend, noglob)) + return (0); + ++string; + } + return (-1); + case '[': + if (noglob) + goto regular; + /* + * range match + */ + if (((test = *string++) == '\0') || + ((pattern = range_match(pattern, test)) == NULL)) + return (-1); + break; + case '\\': + default: + regular: + if (c != *string++) + return (-1); + break; + } + } + /* NOTREACHED */ +} + +static char * +range_match(char *pattern, int test) +{ + char c; + char c2; + int negate; + int ok = 0; + + if ((negate = (*pattern == '!')) != 0) + ++pattern; + + while ((c = *pattern++) != ']') { + /* + * Illegal pattern + */ + if (c == '\0') + return (NULL); + + if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') && + (c2 != ']')) { + if ((c <= test) && (test <= c2)) + ok = 1; + pattern += 2; + } else if (c == test) + ok = 1; + } + return (ok == negate ? NULL : pattern); +} + +/* + * mod_name() + * modify a selected file name. first attempt to apply replacement string + * expressions, then apply interactive file rename. We apply replacement + * string expressions to both filenames and file links (if we didn't the + * links would point to the wrong place, and we could never be able to + * move an archive that has a file link in it). When we rename files + * interactively, we store that mapping (old name to user input name) so + * if we spot any file links to the old file name in the future, we will + * know exactly how to fix the file link. + * Return: + * 0 continue to process file, 1 skip this file, -1 pax is finished + */ + +int +mod_name(ARCHD *arcn, int flags) +{ + int res = 0; + + if (secure) { + if (checkdotdot(arcn->name)) { + tty_warn(0, "Ignoring file containing `..' (%s)", + arcn->name); + return 1; + } +#ifdef notdef + if (checkdotdot(arcn->ln_name)) { + tty_warn(0, "Ignoring link containing `..' (%s)", + arcn->ln_name); + return 1; + } +#endif + } + + /* + * IMPORTANT: We have a problem. what do we do with symlinks? + * Modifying a hard link name makes sense, as we know the file it + * points at should have been seen already in the archive (and if it + * wasn't seen because of a read error or a bad archive, we lose + * anyway). But there are no such requirements for symlinks. On one + * hand the symlink that refers to a file in the archive will have to + * be modified to so it will still work at its new location in the + * file system. On the other hand a symlink that points elsewhere (and + * should continue to do so) should not be modified. There is clearly + * no perfect solution here. So we handle them like hardlinks. Clearly + * a replacement made by the interactive rename mapping is very likely + * to be correct since it applies to a single file and is an exact + * match. The regular expression replacements are a little harder to + * justify though. We claim that the symlink name is only likely + * to be replaced when it points within the file tree being moved and + * in that case it should be modified. what we really need to do is to + * call an oracle here. :) + */ + if (rephead != NULL) { + flags |= (flags & RENM) ? PRNT : 0; + /* + * we have replacement strings, modify the name and the link + * name if any. + */ + if ((res = rep_name(arcn->name, sizeof(arcn->name), + &(arcn->nlen), flags)) != 0) + return res; + + if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || + (arcn->type == PAX_HRG)) && + ((res = rep_name(arcn->ln_name, + sizeof(arcn->ln_name), &(arcn->ln_nlen), + flags | (arcn->type == PAX_SLK ? SYML : 0))) != 0)) + return res; + } + + if (iflag) { + /* + * perform interactive file rename, then map the link if any + */ + if ((res = tty_rename(arcn)) != 0) + return res; + if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || + (arcn->type == PAX_HRG)) + sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name)); + } + + /* + * Strip off leading '/' if appropriate. + * Currently, this option is only set for the tar format. + */ + if (rmleadslash && arcn->name[0] == '/') { + if (arcn->name[1] == '\0') { + arcn->name[0] = '.'; + } else { + (void)memmove(arcn->name, &arcn->name[1], + strlen(arcn->name)); + arcn->nlen--; + } + if (rmleadslash < 2) { + rmleadslash = 2; + tty_warn(0, "Removing leading / from absolute path names in the archive"); + } + } + if (rmleadslash && arcn->ln_name[0] == '/' && + (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) { + if (arcn->ln_name[1] == '\0') { + arcn->ln_name[0] = '.'; + } else { + (void)memmove(arcn->ln_name, &arcn->ln_name[1], + strlen(arcn->ln_name)); + arcn->ln_nlen--; + } + if (rmleadslash < 2) { + rmleadslash = 2; + tty_warn(0, "Removing leading / from absolute path names in the archive"); + } + } + + return res; +} + +/* + * tty_rename() + * Prompt the user for a replacement file name. A "." keeps the old name, + * a empty line skips the file, and an EOF on reading the tty, will cause + * pax to stop processing and exit. Otherwise the file name input, replaces + * the old one. + * Return: + * 0 process this file, 1 skip this file, -1 we need to exit pax + */ + +static int +tty_rename(ARCHD *arcn) +{ + char tmpname[PAXPATHLEN+2]; + int res; + + /* + * prompt user for the replacement name for a file, keep trying until + * we get some reasonable input. Archives may have more than one file + * on them with the same name (from updates etc). We print verbose info + * on the file so the user knows what is up. + */ + tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0); + + for (;;) { + ls_tty(arcn); + tty_prnt("Input new name, or a \".\" to keep the old name, "); + tty_prnt("or a \"return\" to skip this file.\n"); + tty_prnt("Input > "); + if (tty_read(tmpname, sizeof(tmpname)) < 0) + return -1; + if (strcmp(tmpname, "..") == 0) { + tty_prnt("Try again, illegal file name: ..\n"); + continue; + } + if (strlen(tmpname) > PAXPATHLEN) { + tty_prnt("Try again, file name too long\n"); + continue; + } + break; + } + + /* + * empty file name, skips this file. a "." leaves it alone + */ + if (tmpname[0] == '\0') { + tty_prnt("Skipping file.\n"); + return 1; + } + if ((tmpname[0] == '.') && (tmpname[1] == '\0')) { + tty_prnt("Processing continues, name unchanged.\n"); + return 0; + } + + /* + * ok the name changed. We may run into links that point at this + * file later. we have to remember where the user sent the file + * in order to repair any links. + */ + tty_prnt("Processing continues, name changed to: %s\n", tmpname); + res = add_name(arcn->name, arcn->nlen, tmpname); + arcn->nlen = strlcpy(arcn->name, tmpname, sizeof(arcn->name)); + if (res < 0) + return -1; + return 0; +} + +/* + * set_dest() + * fix up the file name and the link name (if any) so this file will land + * in the destination directory (used during copy() -rw). + * Return: + * 0 if ok, -1 if failure (name too long) + */ + +int +set_dest(ARCHD *arcn, char *dest_dir, int dir_len) +{ + if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0) + return -1; + + /* + * It is really hard to deal with symlinks here, we cannot be sure + * if the name they point was moved (or will be moved). It is best to + * leave them alone. + */ + if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG)) + return 0; + + if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0) + return -1; + return 0; +} + +/* + * fix_path + * concatenate dir_name and or_name and store the result in or_name (if + * it fits). This is one ugly function. + * Return: + * 0 if ok, -1 if the final name is too long + */ + +static int +fix_path( char *or_name, int *or_len, char *dir_name, int dir_len) +{ + char *src; + char *dest; + char *start; + int len; + + /* + * we shift the or_name to the right enough to tack in the dir_name + * at the front. We make sure we have enough space for it all before + * we start. since dest always ends in a slash, we skip of or_name + * if it also starts with one. + */ + start = or_name; + src = start + *or_len; + dest = src + dir_len; + if (*start == '/') { + ++start; + --dest; + } + if ((len = dest - or_name) > PAXPATHLEN) { + tty_warn(1, "File name %s/%s, too long", dir_name, start); + return -1; + } + *or_len = len; + + /* + * enough space, shift + */ + while (src >= start) + *dest-- = *src--; + src = dir_name + dir_len - 1; + + /* + * splice in the destination directory name + */ + while (src >= dir_name) + *dest-- = *src--; + + *(or_name + len) = '\0'; + return 0; +} + +/* + * rep_name() + * walk down the list of replacement strings applying each one in order. + * when we find one with a successful substitution, we modify the name + * as specified. if required, we print the results. if the resulting name + * is empty, we will skip this archive member. We use the regexp(3) + * routines (regexp() ought to win a prize as having the most cryptic + * library function manual page). + * --Parameters-- + * name is the file name we are going to apply the regular expressions to + * (and may be modified) + * namelen the size of the name buffer. + * nlen is the length of this name (and is modified to hold the length of + * the final string). + * prnt is a flag that says whether to print the final result. + * Return: + * 0 if substitution was successful, 1 if we are to skip the file (the name + * ended up empty) + */ + +static int +rep_name(char *name, size_t namelen, int *nlen, int flags) +{ + REPLACE *pt; + char *inpt; + char *outpt; + char *endpt; + char *rpt; + int found = 0; + int res; + regmatch_t pm[MAXSUBEXP]; + char nname[PAXPATHLEN+1]; /* final result of all replacements */ + char buf1[PAXPATHLEN+1]; /* where we work on the name */ + + /* + * copy the name into buf1, where we will work on it. We need to keep + * the orig string around so we can print out the result of the final + * replacement. We build up the final result in nname. inpt points at + * the string we apply the regular expression to. prnt is used to + * suppress printing when we handle replacements on the link field + * (the user already saw that substitution go by) + */ + pt = rephead; + (void)strlcpy(buf1, name, sizeof(buf1)); + inpt = buf1; + outpt = nname; + endpt = outpt + PAXPATHLEN; + + /* + * try each replacement string in order + */ + while (pt != NULL) { + do { + if ((flags & SYML) && (pt->flgs & SYML)) + continue; + /* + * check for a successful substitution, if not go to + * the next pattern, or cleanup if we were global + */ + if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0) + break; + + /* + * ok we found one. We have three parts, the prefix + * which did not match, the section that did and the + * tail (that also did not match). Copy the prefix to + * the final output buffer (watching to make sure we + * do not create a string too long). + */ + found = 1; + rpt = inpt + pm[0].rm_so; + + while ((inpt < rpt) && (outpt < endpt)) + *outpt++ = *inpt++; + if (outpt == endpt) + break; + + /* + * for the second part (which matched the regular + * expression) apply the substitution using the + * replacement string and place it the prefix in the + * final output. If we have problems, skip it. + */ + if ((res = + resub(&(pt->rcmp),pm,pt->nstr,inpt, outpt,endpt) + ) < 0) { + if (flags & PRNT) + tty_warn(1, "Replacement name error %s", + name); + return 1; + } + outpt += res; + + /* + * we set up to look again starting at the first + * character in the tail (of the input string right + * after the last character matched by the regular + * expression (inpt always points at the first char in + * the string to process). If we are not doing a global + * substitution, we will use inpt to copy the tail to + * the final result. Make sure we do not overrun the + * output buffer + */ + inpt += pm[0].rm_eo - pm[0].rm_so; + + if ((outpt == endpt) || (*inpt == '\0')) + break; + + /* + * if the user wants global we keep trying to + * substitute until it fails, then we are done. + */ + } while (pt->flgs & GLOB); + + if (found) + break; + + /* + * a successful substitution did NOT occur, try the next one + */ + pt = pt->fow; + } + + if (found) { + /* + * we had a substitution, copy the last tail piece (if there is + * room) to the final result + */ + while ((outpt < endpt) && (*inpt != '\0')) + *outpt++ = *inpt++; + + *outpt = '\0'; + if ((outpt == endpt) && (*inpt != '\0')) { + if (flags & PRNT) + tty_warn(1,"Replacement name too long %s >> %s", + name, nname); + return 1; + } + + /* + * inform the user of the result if wanted + */ + if ((flags & PRNT) && (pt->flgs & PRNT)) { + if (*nname == '\0') + (void)fprintf(stderr,"%s >> \n", + name); + else + (void)fprintf(stderr,"%s >> %s\n", name, nname); + } + + /* + * if empty inform the caller this file is to be skipped + * otherwise copy the new name over the orig name and return + */ + if (*nname == '\0') + return 1; + if (flags & RENM) + *nlen = strlcpy(name, nname, namelen); + } + return 0; +} + + +/* + * checkdotdot() + * Return true if a component of the name contains a reference to ".." + */ +static int +checkdotdot(const char *name) +{ + const char *p; + /* 1. "..{[/],}" */ + if (name[0] == '.' && name[1] == '.' && + (name[2] == '/' || name[2] == '\0')) + return 1; + + /* 2. "*[/]..[/]*" */ + if (strstr(name, "/../") != NULL) + return 1; + + /* 3. "*[/].." */ + for (p = name; *p; p++) + continue; + if (p - name < 3) + return 0; + if (p[-1] == '.' && p[-2] == '.' && p[-3] == '/') + return 1; + + return 0; +} + + +/* + * resub() + * apply the replacement to the matched expression. expand out the old + * style ed(1) subexpression expansion. + * Return: + * -1 if error, or the number of characters added to the destination. + */ + +static int +resub(regex_t *rp, regmatch_t *pm, char *src, char *txt, char *dest, + char *destend) +{ + char *spt; + char *dpt; + char c; + regmatch_t *pmpt; + int len; + int subexcnt; + + spt = src; + dpt = dest; + subexcnt = rp->re_nsub; + while ((dpt < destend) && ((c = *spt++) != '\0')) { + /* + * see if we just have an ordinary replacement character + * or we refer to a subexpression. + */ + if (c == '&') { + pmpt = pm; + } else if ((c == '\\') && (*spt >= '1') && (*spt <= '9')) { + /* + * make sure there is a subexpression as specified + */ + if ((len = *spt++ - '0') > subexcnt) + return -1; + pmpt = pm + len; + } else { + /* + * Ordinary character, just copy it + */ + if ((c == '\\') && ((*spt == '\\') || (*spt == '&'))) + c = *spt++; + *dpt++ = c; + continue; + } + + /* + * continue if the subexpression is bogus + */ + if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) || + ((len = pmpt->rm_eo - pmpt->rm_so) <= 0)) + continue; + + /* + * copy the subexpression to the destination. + * fail if we run out of space or the match string is damaged + */ + if (len > (destend - dpt)) + return -1; + strncpy(dpt, txt + pmpt->rm_so, len); + dpt += len; + } + return dpt - dest; +} diff --git a/bin/pax/pat_rep.h b/bin/pax/pat_rep.h new file mode 100644 index 0000000..f2e0cbe --- /dev/null +++ b/bin/pax/pat_rep.h @@ -0,0 +1,51 @@ +/* $NetBSD: pat_rep.h,v 1.7 2008/02/24 20:42:46 joerg Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)pat_rep.h 8.1 (Berkeley) 5/31/93 + */ + +#include +/* + * data structure for storing user supplied replacement strings (-s) + */ +typedef struct replace { + char *nstr; /* the new string we will substitute with */ + regex_t rcmp; /* compiled regular expression used to match */ + int flgs; /* print conversions? global in operation? */ +#define PRNT 0x1 +#define GLOB 0x2 +#define RENM 0x4 +#define SYML 0x8 + struct replace *fow; /* pointer to next pattern */ +} REPLACE; diff --git a/bin/pax/pax.1 b/bin/pax/pax.1 new file mode 100644 index 0000000..8203a94 --- /dev/null +++ b/bin/pax/pax.1 @@ -0,0 +1,1304 @@ +.\" $NetBSD: pax.1,v 1.69 2017/07/03 21:33:23 wiz Exp $ +.\" +.\" Copyright (c) 1992 Keith Muller. +.\" Copyright (c) 1992, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" Keith Muller of the University of California, San Diego. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)pax.1 8.4 (Berkeley) 4/18/94 +.\" +.Dd August 12, 2016 +.Dt PAX 1 +.Os +.Sh NAME +.Nm pax +.Nd read and write file archives and copy directory hierarchies +.Sh SYNOPSIS +.Nm +.Op Fl 0cdjnOVvz +.Op Fl E Ar limit +.Op Fl f Ar archive +.Op Fl N Ar dbdir +.Op Fl s Ar replstr +.Ar ...\& +.Op Fl U Ar user +.Ar ...\& +.Op Fl G Ar group +.Ar ...\& +.Oo +.Fl T +.Sm off +.Op Ar from_date +.Oo , Ar to_date Oc +.Sm on +.Oc +.Ar ...\& +.Op Ar pattern ...\& +.Nm +.Fl r +.Op Fl AcDdijknOuVvYZz +.Op Fl E Ar limit +.Op Fl f Ar archive +.Op Fl N Ar dbdir +.Op Fl o Ar options +.Ar ...\& +.Op Fl p Ar string +.Ar ...\& +.Op Fl s Ar replstr +.Ar ...\& +.Op Fl U Ar user +.Ar ...\& +.Op Fl G Ar group +.Ar ...\& +.Oo +.Fl T +.Sm off +.Op Ar from_date +.Oo , Ar to_date Oc +.Sm on +.Oc +.Ar ...\& +.Op Ar pattern ...\& +.Nm +.Fl w +.Op Fl AdHijLMOPtuVvXz +.Op Fl b Ar blocksize +.Oo +.Op Fl a +.Op Fl f Ar archive +.Oc +.Op Fl x Ar format +.Op Fl B Ar bytes +.Op Fl N Ar dbdir +.Op Fl o Ar options +.Ar ...\& +.Op Fl s Ar replstr +.Ar ...\& +.Op Fl U Ar user +.Ar ...\& +.Op Fl G Ar group +.Ar ...\& +.Oo +.Fl T +.Sm off +.Op Ar from_date +.Oo , Ar to_date Oc +.Oo /[ Cm c ] [ Cm m ] Oc +.Sm on +.Oc +.Ar ...\& +.Op Ar file ...\& +.Nm +.Fl r +.Fl w +.Op Fl ADdHijkLlMnOPtuVvXYZz +.Op Fl N Ar dbdir +.Op Fl p Ar string +.Ar ...\& +.Op Fl s Ar replstr +.Ar ...\& +.Op Fl U Ar user +.Ar ...\& +.Op Fl G Ar group +.Ar ...\& +.Oo +.Fl T +.Sm off +.Op Ar from_date +.Oo , Ar to_date Oc +.Oo /[ Cm c ] [ Cm m ] Oc +.Sm on +.Oc +.Ar ...\& +.Op Ar file ...\& +.Ar directory +.Sh DESCRIPTION +.Nm +will read, write, and list the members of an archive file, +and will copy directory hierarchies. +If the archive file is of the form: +.Ar [[user@]host:]file +then the archive will be processed using +.Xr rmt 8 . +.Pp +.Nm +operation is independent of the specific archive format, +and supports a wide variety of different archive formats. +A list of supported archive formats can be found under the description of the +.Fl x +option. +.Pp +The presence of the +.Fl r +and the +.Fl w +options specifies which of the following functional modes +.Nm +will operate under: +.Em list , read , write , +and +.Em copy . +.Bl -tag -width 6n +.It Aq none +.Em List . +.Nm +will write to +.Dv standard output +a table of contents of the members of the archive file read from +.Dv standard input , +whose pathnames match the specified +.Ar patterns . +The table of contents contains one filename per line +and is written using single line buffering. +.It Fl r +.Em Read . +.Nm +extracts the members of the archive file read from the +.Dv standard input , +with pathnames matching the specified +.Ar patterns . +The archive format and blocking is automatically determined on input. +When an extracted file is a directory, the entire file hierarchy +rooted at that directory is extracted. +All extracted files are created relative to the current file hierarchy. +The setting of ownership, access and modification times, and file mode of +the extracted files are discussed in more detail under the +.Fl p +option. +.It Fl w +.Em Write . +.Nm +writes an archive containing the +.Ar file +operands to +.Dv standard output +using the specified archive format. +When no +.Ar file +operands are specified, a list of files to copy with one per line is read from +.Dv standard input . +When a +.Ar file +operand is also a directory, the entire file hierarchy rooted +at that directory will be included. +.It Fl r Fl w +.Em Copy . +.Nm +copies the +.Ar file +operands to the destination +.Ar directory . +When no +.Ar file +operands are specified, a list of files to copy with one per line is read from +the +.Dv standard input . +When a +.Ar file +operand is also a directory the entire file +hierarchy rooted at that directory will be included. +The effect of the +.Em copy +is as if the copied files were written to an archive file and then +subsequently extracted, except that there may be hard links between +the original and the copied files (see the +.Fl l +option below). +.Pp +.Em Warning : +The destination +.Ar directory +must not be one of the +.Ar file +operands or a member of a file hierarchy rooted at one of the +.Ar file +operands. +The result of a +.Em copy +under these conditions is unpredictable. +.El +.Pp +While processing a damaged archive during a +.Em read +or +.Em list +operation, +.Nm +will attempt to recover from media defects and will search through the archive +to locate and process the largest number of archive members possible (see the +.Fl E +option for more details on error handling). +.Sh OPERANDS +The +.Ar directory +operand specifies a destination directory pathname. +If the +.Ar directory +operand does not exist, or it is not writable by the user, +or it is not of type directory, +.Nm +will exit with a non-zero exit status. +.Pp +The +.Ar pattern +operand is used to select one or more pathnames of archive members. +Archive members are selected using the pattern matching notation described +by +.Xr fnmatch 3 . +When the +.Ar pattern +operand is not supplied, all members of the archive will be selected. +When a +.Ar pattern +matches a directory, the entire file hierarchy rooted at that directory will +be selected. +When a +.Ar pattern +operand does not select at least one archive member, +.Nm +will write these +.Ar pattern +operands in a diagnostic message to +.Dv standard error +and then exit with a non-zero exit status. +.Pp +The +.Ar file +operand specifies the pathname of a file to be copied or archived. +When a +.Ar file +operand does not select at least one archive member, +.Nm +will write these +.Ar file +operand pathnames in a diagnostic message to +.Dv standard error +and then exit with a non-zero exit status. +.Sh OPTIONS +The following options are supported: +.Bl -tag -width 4n +.It Fl r +Read an archive file from +.Dv standard input +and extract the specified +.Ar files . +If any intermediate directories are needed in order to extract an archive +member, these directories will be created as if +.Xr mkdir 2 +was called with the bitwise inclusive +.Dv OR +of +.Dv S_IRWXU , S_IRWXG , +and +.Dv S_IRWXO +as the mode argument. +When the selected archive format supports the specification of linked +files and these files cannot be linked while the archive is being extracted, +.Nm +will write a diagnostic message to +.Dv standard error +and exit with a non-zero exit status at the completion of operation. +.It Fl w +Write files to the +.Dv standard output +in the specified archive format. +When no +.Ar file +operands are specified, +.Dv standard input +is read for a list of pathnames with one per line without any leading or +trailing +.Aq blanks . +.It Fl a +Append +.Ar files +to the end of an archive that was previously written. +If an archive format is not specified with a +.Fl x +option, the format currently being used in the archive will be selected. +Any attempt to append to an archive in a format different from the +format already used in the archive will cause +.Nm +to exit immediately +with a non-zero exit status. +The blocking size used in the archive volume where writing starts +will continue to be used for the remainder of that archive volume. +.Pp +.Em Warning : +Many storage devices are not able to support the operations necessary +to perform an append operation. +Any attempt to append to an archive stored on such a device may damage the +archive or have other unpredictable results. +Tape drives in particular are more likely to not support an append operation. +An archive stored in a regular file system file or on a disk device will +usually support an append operation. +.It Fl b Ar blocksize +When +.Em writing +an archive, +block the output at a positive decimal integer number of +bytes per write to the archive file. +The +.Ar blocksize +must be a multiple of 512 bytes with a maximum of 32256 bytes. +A +.Ar blocksize +can end with +.Li k +or +.Li b +to specify multiplication by 1024 (1K) or 512, respectively. +A pair of +.Ar blocksizes +can be separated by +.Li x +to indicate a product. +A specific archive device may impose additional restrictions on the size +of blocking it will support. +When blocking is not specified, the default +.Ar blocksize +is dependent on the specific archive format being used (see the +.Fl x +option). +.It Fl c +Match all file or archive members +.Em except +those specified by the +.Ar pattern +and +.Ar file +operands. +.It Fl d +Cause files of type directory being copied or archived, or archive members of +type directory being extracted, to match only the directory file or archive +member and not the file hierarchy rooted at the directory. +.It Fl f Ar archive +Specify +.Ar archive +as the pathname of the input or output archive, overriding the default +.Dv standard input +(for +.Em list +and +.Em read ) +or +.Dv standard output +(for +.Em write ) . +A single archive may span multiple files and different archive devices. +When required, +.Nm +will prompt for the pathname of the file or device of the next volume in the +archive. +.It Fl i +Interactively rename files or archive members. +For each archive member matching a +.Ar pattern +operand or each file matching a +.Ar file +operand, +.Nm +will prompt to +.Pa /dev/tty +giving the name of the file, its file mode and its modification time. +.Nm +will then read a line from +.Pa /dev/tty . +If this line is blank, the file or archive member is skipped. +If this line consists of a single period, the +file or archive member is processed with no modification to its name. +Otherwise, its name is replaced with the contents of the line. +.Nm +will immediately exit with a non-zero exit status if +.Aq Dv EOF +is encountered when reading a response or if +.Pa /dev/tty +cannot be opened for reading and writing. +.It Fl j +Use +.Xr bzip2 1 +for compression when reading or writing archive files. +.It Fl k +Do not overwrite existing files. +.It Fl l +Link files. +(The letter ell). +In the +.Em copy +mode +.Fl ( r +.Fl w ) , +hard links are made between the source and destination file hierarchies +whenever possible. +.It Fl n +Select the first archive member that matches each +.Ar pattern +operand. +No more than one archive member is matched for each +.Ar pattern . +When members of type directory are matched, the file hierarchy rooted at that +directory is also matched (unless +.Fl d +is also specified). +.It Fl o Ar options +Information to modify the algorithm for extracting or writing archive files +which is specific to the archive format specified by +.Fl x . +In general, +.Ar options +take the form: +.Cm name=value +.It Fl p Ar string +Specify one or more file characteristic options (privileges). +The +.Ar string +option-argument is a string specifying file characteristics to be retained or +discarded on extraction. +The string consists of the specification characters +.Cm a , e , +.Cm m , o , +and +.Cm p . +Multiple characteristics can be concatenated within the same string +and multiple +.Fl p +options can be specified. +The meaning of the specification characters are as follows: +.Bl -tag -width 2n +.It Cm a +Do not preserve file access times. +By default, file access times are preserved whenever possible. +.It Cm e +.Sq Preserve everything , +the user ID, group ID, file mode bits, +file access time, and file modification time. +This is intended to be used by +.Em root , +someone with all the appropriate privileges, in order to preserve all +aspects of the files as they are recorded in the archive. +The +.Cm e +flag is the sum of the +.Cm o +and +.Cm p +flags. +.\" .It Cm f +.\" Do not preserve file flags. +.\" By default, file flags are preserved whenever possible. +.It Cm m +Do not preserve file modification times. +By default, file modification times are preserved whenever possible. +.It Cm o +Preserve the user ID and group ID. +.It Cm p +.Sq Preserve +the file mode bits. +This is intended to be used by a +.Em user +with regular privileges who wants to preserve all aspects of the file other +than the ownership. +The file times are preserved by default, but two other flags are offered to +disable this and use the time of extraction instead. +.El +.Pp +In the preceding list, +.Sq preserve +indicates that an attribute stored in the archive is given to the +extracted file, subject to the permissions of the invoking +process. +Otherwise the attribute of the extracted file is determined as +part of the normal file creation action. +If neither the +.Cm e +nor the +.Cm o +specification character is specified, or the user ID and group ID are not +preserved for any reason, +.Nm +will not set the +.Dv S_ISUID +.Em ( setuid ) +and +.Dv S_ISGID +.Em ( setgid ) +bits of the file mode. +If the preservation of any of these items fails for any reason, +.Nm +will write a diagnostic message to +.Dv standard error . +Failure to preserve these items will affect the final exit status, +but will not cause the extracted file to be deleted. +If the file characteristic letters in any of the string option-arguments are +duplicated or conflict with each other, the one(s) given last will take +precedence. +For example, if +.Dl Fl p Ar eme +is specified, file modification times are still preserved. +.It Fl s Ar replstr +Modify the file or archive member names specified by the +.Ar pattern +or +.Ar file +operands according to the substitution expression +.Ar replstr , +using the syntax of the +.Xr ed 1 +utility regular expressions. +The format of these regular expressions are: +.Dl /old/new/[gp] +As in +.Xr ed 1 , +.Cm old +is a basic regular expression and +.Cm new +can contain an ampersand (&), \en (where n is a digit) back-references, +or subexpression matching. +The +.Cm old +string may also contain +.Aq Dv newline +characters. +Any non-null character can be used as a delimiter (/ is shown here). +Multiple +.Fl s +expressions can be specified. +The expressions are applied in the order they are specified on the +command line, terminating with the first successful substitution. +The optional trailing +.Cm g +continues to apply the substitution expression to the pathname substring +which starts with the first character following the end of the last successful +substitution. +The first unsuccessful substitution stops the operation of the +.Cm g +option. +The optional trailing +.Cm p +will cause the final result of a successful substitution to be written to +.Dv standard error +in the following format: +.Dl Ao "original pathname" Ac >> Ao "new pathname" Ac +File or archive member names that substitute to the empty string +are not selected and will be skipped. +.It Fl t +Reset the access times of any file or directory read or accessed by +.Nm +to be the same as they were before being read or accessed by +.Nm , +if the user has the appropriate permissions required by +.Xr utime 3 . +.It Fl u +Ignore files that are older (having a less recent file modification time) +than a pre-existing file or archive member with the same name. +During +.Em read , +an archive member with the same name as a file in the file system will be +extracted if the archive member is newer than the file. +During +.Em write , +a file system member with the same name as an archive member will be +written to the archive if it is newer than the archive member. +During +.Em copy , +the file in the destination hierarchy is replaced by the file in the source +hierarchy or by a link to the file in the source hierarchy if the file in +the source hierarchy is newer. +.It Fl v +During a +.Em list +operation, produce a verbose table of contents using the format of the +.Xr ls 1 +utility with the +.Fl l +option. +For pathnames representing a hard link to a previous member of the archive, +the output has the format: +.Dl Ao "ls -l listing" Ac == Ao "link name" Ac +Where +.Aq "ls -l listing" +is the output format specified by the +.Xr ls 1 +utility when used with the +.Fl l +option. +.Pp +Otherwise for all the other operational modes +.Em ( read , write , +and +.Em copy ) , +pathnames are written and flushed to +.Dv standard error +without a trailing +.Aq Dv newline +as soon as processing begins on that file or +archive member. +The trailing +.Aq Dv newline , +is not buffered, and is written only after the file has been read or written. +.Pp +A final summary of archive operations is printed after they have been +completed. +.It Fl x Ar format +Specify the output archive format, with the default format being +.Ar ustar . +.Nm +currently supports the following formats: +.Bl -tag -width "sv4cpio" +.It Ar cpio +The extended cpio interchange format specified in the +.St -p1003.2 +standard. +The default blocksize for this format is 5120 bytes. +Inode and device information about a file (used for detecting file hard links +by this format) which may be truncated by this format is detected by +.Nm +and is repaired. +.It Ar bcpio +The old binary cpio format. +The default blocksize for this format is 5120 bytes. +This format is not very portable and should not be used when other formats +are available. +Inode and device information about a file (used for detecting file hard links +by this format) which may be truncated by this format is detected by +.Nm +and is repaired. +.It Ar sv4cpio +The +.At V.4 +cpio. +The default blocksize for this format is 5120 bytes. +Inode and device information about a file (used for detecting file hard links +by this format) which may be truncated by this format is detected by +.Nm +and is repaired. +.It Ar sv4crc +The +.At V.4 +cpio with file crc checksums. +The default blocksize for this format is 5120 bytes. +Inode and device information about a file (used for detecting file hard links +by this format) which may be truncated by this format is detected by +.Nm +and is repaired. +.It Ar tar +The old +.Bx +tar format as found in +.Bx 4.3 . +The default blocksize for this format is 10240 bytes. +Pathnames stored by this format must be 100 characters or less in length. +Only +.Em regular +files, +.Em hard links , soft links , +and +.Em directories +will be archived (other file types are not supported). +For backward compatibility with even older tar formats, a +.Fl o +option can be used when writing an archive to omit the storage of directories. +This option takes the form: +.Dl Fl o Cm write_opt=nodir +.It Ar ustar +The extended tar interchange format specified in the +.St -p1003.2 +standard. +The default blocksize for this format is 10240 bytes. +Pathnames stored by this format must be 250 characters or less in length. +.El +.Pp +.Nm +will detect and report any file that it is unable to store or extract +as the result of any specific archive format restrictions. +The individual archive formats may impose additional restrictions on use. +Typical archive format restrictions include (but are not limited to): +file pathname length, file size, link pathname length and the type of the file. +.It Fl Fl gnu +Recognize GNU tar extensions. +.It Fl Fl timestamp Ar timestamp +Store all modification times in the archive with the +.Ar timestamp +given instead of the actual modification time of the individual archive member +so that repeatable builds are possible. +The +.Ar timestamp +can be a +.Pa pathname , +where the timestamps are derived from that file, a parseable date for +.Xr parsedate 3 +(this option is not yet available in the tools build), or an integer value +interpreted as the number of seconds from the Epoch. +.It Fl Fl xz +Use +.Xr xz 1 +compression, when reading or writing archive files. +.It Fl z +Use +.Xr gzip 1 +compression, when reading or writing archive files. +.It Fl A +Do not strip leading `/'s from file names. +.It Fl B Ar bytes +Limit the number of bytes written to a single archive volume to +.Ar bytes . +The +.Ar bytes +limit can end with +.Li m , +.Li k , +or +.Li b +to specify multiplication by 1048576 (1M), 1024 (1K) or 512, respectively. +A pair of +.Ar bytes +limits can be separated by +.Li x +to indicate a product. +.Pp +.Em Warning : +Only use this option when writing an archive to a device which supports +an end of file read condition based on last (or largest) write offset +(such as a regular file or a tape drive). +The use of this option with a floppy or hard disk is not recommended. +.It Fl D +This option is the same as the +.Fl u +option, except that the file inode change time is checked instead of the +file modification time. +The file inode change time can be used to select files whose inode information +(e.g. uid, gid, etc.) is newer than a copy of the file in the destination +.Ar directory . +.It Fl E Ar limit +Limit the number of consecutive read faults while trying to read a flawed +archives to +.Ar limit . +With a positive +.Ar limit , +.Nm +will attempt to recover from an archive read error and will +continue processing starting with the next file stored in the archive. +A +.Ar limit +of 0 will cause +.Nm +to stop operation after the first read error is detected on an archive volume. +A +.Ar limit +of +.Li NONE +will cause +.Nm +to attempt to recover from read errors forever. +The default +.Ar limit +is a small positive number of retries. +.Pp +.Em Warning : +Using this option with +.Li NONE +should be used with extreme caution as +.Nm +may get stuck in an infinite loop on a very badly flawed archive. +.It Fl G Ar group +Select a file based on its +.Ar group +name, or when starting with a +.Cm # , +a numeric gid. +A '\e' can be used to escape the +.Cm # . +Multiple +.Fl G +options may be supplied and checking stops with the first match. +.It Fl H +Follow only command line symbolic links while performing a physical file +system traversal. +.It Fl L +Follow all symbolic links to perform a logical file system traversal. +.It Fl M +During a +.Em write +or +.Em copy +operation, treat the list of files on +.Dv standard input +as an +.Xr mtree 8 +.Sq specfile +specification, and write or copy only those items in the specfile. +.Pp +If the file exists in the underlying file system, its permissions and +modification time will be used unless specifically overridden by the specfile. +An error will be raised if the type of entry in the specfile conflicts +with that of an existing file. +A directory entry that is marked +.Sq Sy optional +will not be copied (even though its contents will be). +.Pp +Otherwise, the entry will be +.Sq faked-up , +and it is necessary to specify at least the following parameters +in the specfile: +.Sy type , +.Sy mode , +.Sy gname +or +.Sy gid , +and +.Sy uname +or +.Sy uid , +.Sy device +(in the case of block or character devices), and +.Sy link +(in the case of symbolic links). +If +.Sy time +isn't provided, the current time will be used. +A +.Sq faked-up +entry that is marked +.Sq Sy optional +will not be copied. +.It Fl N Ar dbdir +Except for lookups for the +.Fl G +and +.Fl U +options, +use the user database text file +.Pa master.passwd +and group database text file +.Pa group +from +.Ar dbdir , +rather than using the results from the system's +.Xr getpwnam 3 +and +.Xr getgrnam 3 +(and related) library calls. +.It Fl O +Force the archive to be one volume. +If a volume ends prematurely, +.Nm +will not prompt for a new volume. +This option can be useful for +automated tasks where error recovery cannot be performed by a human. +.It Fl P +Do not follow symbolic links, perform a physical file system traversal. +This is the default mode. +.It Fl T Ar [from_date][,to_date][/[c][m]] +Allow files to be selected based on a file modification or inode change +time falling within a specified time range of +.Ar from_date +to +.Ar to_date +(the dates are inclusive). +If only a +.Ar from_date +is supplied, all files with a modification or inode change time +equal to or younger are selected. +If only a +.Ar to_date +is supplied, all files with a modification or inode change time +equal to or older will be selected. +When the +.Ar from_date +is equal to the +.Ar to_date , +only files with a modification or inode change time of exactly that +time will be selected. +.Pp +When +.Nm +is in the +.Em write +or +.Em copy +mode, the optional trailing field +.Ar [c][m] +can be used to determine which file time (inode change, file modification or +both) are used in the comparison. +If neither is specified, the default is to use file modification time only. +The +.Ar m +specifies the comparison of file modification time (the time when +the file was last written). +The +.Ar c +specifies the comparison of inode change time (the time when the file +inode was last changed; e.g. a change of owner, group, mode, etc). +When +.Ar c +and +.Ar m +are both specified, then the modification and inode change times are +both compared. +The inode change time comparison is useful in selecting files whose +attributes were recently changed or selecting files which were recently +created and had their modification time reset to an older time (as what +happens when a file is extracted from an archive and the modification time +is preserved). +Time comparisons using both file times is useful when +.Nm +is used to create a time based incremental archive (only files that were +changed during a specified time range will be archived). +.Pp +A time range is made up of seven different fields and each field must contain +two digits. +The format is: +.Dl [[[[[cc]yy]mm]dd]hh]mm[\&.ss] +where +.Cm cc +is the first two digits of the year (the century), +.Cm yy +is the last two digits of the year, +the first +.Cm mm +is the month (from 01 to 12), +.Cm dd +is the day of the month (from 01 to 31), +.Cm hh +is the hour of the day (from 00 to 23), +the second +.Cm mm +is the minute (from 00 to 59), +and +.Cm ss +is the seconds (from 00 to 61). +Only the minute field +.Cm mm +is required; the others will default to the current system values. +The +.Cm ss +field may be added independently of the other fields. +If the century is not specified, it defaults to 1900 for +years between 69 and 99, or 2000 for years between 0 and 68. +Time ranges are relative to the current time, so +.Dl Fl T Ar 1234/cm +would select all files with a modification or inode change time +of 12:34 PM today or later. +Multiple +.Fl T +time range can be supplied and checking stops with the first match. +.It Fl U Ar user +Select a file based on its +.Ar user +name, or when starting with a +.Cm # , +a numeric uid. +A '\e' can be used to escape the +.Cm # . +Multiple +.Fl U +options may be supplied and checking stops with the first match. +.It Fl V +A final summary of archive operations is printed after they have been +completed. +Some potentially long-running tape operations are noted. +.It Fl X +When traversing the file hierarchy specified by a pathname, +do not descend into directories that have a different device ID. +See the +.Li st_dev +field as described in +.Xr stat 2 +for more information about device ID's. +.It Fl Y +This option is the same as the +.Fl D +option, except that the inode change time is checked using the +pathname created after all the file name modifications have completed. +.It Fl Z +This option is the same as the +.Fl u +option, except that the modification time is checked using the +pathname created after all the file name modifications have completed. +.It Fl 0 +Use the nul character instead of \en as the file separator when reading +files from standard input. +.It Fl Fl force-local +Do not interpret filenames that contain a `:' as remote files. +.It Fl Fl insecure +Normally +.Nm +ignores filenames that contain +.Dq .. +as a path component. +With this option, +files that contain +.Dq .. +can be processed. +.It Fl Fl use-compress-program +Use the named program as the program to decompress the input or compress +the output. +.El +.Pp +The options that operate on the names of files or archive members +.Fl ( c , +.Fl i , +.Fl n , +.Fl s , +.Fl u , +.Fl v , +.Fl D , +.Fl G , +.Fl T , +.Fl U , +.Fl Y , +and +.Fl Z ) +interact as follows. +.Pp +When extracting files during a +.Em read +operation, archive members are +.Sq selected , +based only on the user specified pattern operands as modified by the +.Fl c , +.Fl n , +.Fl u , +.Fl D , +.Fl G , +.Fl T , +.Fl U +options. +Then any +.Fl s +and +.Fl i +options will modify in that order, the names of these selected files. +Then the +.Fl Y +and +.Fl Z +options will be applied based on the final pathname. +Finally the +.Fl v +option will write the names resulting from these modifications. +.Pp +When archiving files during a +.Em write +operation, or copying files during a +.Em copy +operation, archive members are +.Sq selected , +based only on the user specified pathnames as modified by the +.Fl n , +.Fl u , +.Fl D , +.Fl G , +.Fl T , +and +.Fl U +options (the +.Fl D +option only applies during a copy operation). +Then any +.Fl s +and +.Fl i +options will modify in that order, the names of these selected files. +Then during a +.Em copy +operation the +.Fl Y +and the +.Fl Z +options will be applied based on the final pathname. +Finally the +.Fl v +option will write the names resulting from these modifications. +.Pp +When one or both of the +.Fl u +or +.Fl D +options are specified along with the +.Fl n +option, a file is not considered selected unless it is newer +than the file to which it is compared. +.Sh EXIT STATUS +.Nm +will exit with one of the following values: +.Bl -tag -width 2n +.It 0 +All files were processed successfully. +.It 1 +An error occurred. +.El +.Pp +Whenever +.Nm +cannot create a file or a link when reading an archive or cannot +find a file when writing an archive, or cannot preserve the user ID, +group ID, or file mode when the +.Fl p +option is specified, a diagnostic message is written to +.Dv standard error +and a non-zero exit status will be returned, but processing will continue. +In the case where pax cannot create a link to a file, +.Nm +will not create a second copy of the file. +.Pp +If the extraction of a file from an archive is prematurely terminated by +a signal or error, +.Nm +may have only partially extracted a file the user wanted. +Additionally, the file modes of extracted files and directories +may have incorrect file bits, and the modification and access times may be +wrong. +.Pp +If the creation of an archive is prematurely terminated by a signal or error, +.Nm +may have only partially created the archive which may violate the specific +archive format specification. +.Pp +If while doing a +.Em copy , +.Nm +detects a file is about to overwrite itself, the file is not copied, +a diagnostic message is written to +.Dv standard error +and when +.Nm +completes it will exit with a non-zero exit status. +.Sh EXAMPLES +The command: +.Dl pax -w -f /dev/rst0 \&. +copies the contents of the current directory to the device +.Pa /dev/rst0 . +.Pp +The command: +.Dl pax -v -f filename +gives the verbose table of contents for an archive stored in +.Pa filename . +.Pp +The following commands: +.Dl mkdir newdir +.Dl cd olddir +.Dl pax -rw -pp .\ ../newdir +will copy the entire +.Pa olddir +directory hierarchy to +.Pa newdir , +preserving permissions and access times. +.Pp +When running as root, one may also wish to preserve file +ownership when copying directory trees. +This can be done with the following commands: +.Dl cd olddir +.Dl pax -rw -pe .\ ../newdir +which will copy the contents of +.Pa olddir +into +.Pa ../newdir , +preserving ownership, permissions and access times. +.Pp +The command: +.Dl pax -r -s ',^//*usr//*,,' -f a.pax +reads the archive +.Pa a.pax , +with all files rooted in ``/usr'' into the archive extracted relative to the +current directory. +.Pp +The command: +.Dl pax -rw -i .\ dest_dir +can be used to interactively select the files to copy from the current +directory to +.Pa dest_dir . +.Pp +The command: +.Dl pax -r -pe -U root -G bin -f a.pax +will extract all files from the archive +.Pa a.pax +which are owned by +.Em root +with group +.Em bin +and will preserve all file permissions. +.Pp +The command: +.Dl pax -r -w -v -Y -Z home /backup +will update (and list) only those files in the destination directory +.Pa /backup +which are older (less recent inode change or file modification times) than +files with the same name found in the source file tree +.Pa home . +.Sh SEE ALSO +.Xr cpio 1 , +.Xr tar 1 , +.Xr symlink 7 , +.Xr mtree 8 +.Sh STANDARDS +The +.Nm +utility is a superset of the +.St -p1003.2 +standard. +The options +.Fl B , +.Fl D , +.Fl E , +.Fl G , +.Fl H , +.Fl L , +.Fl M , +.Fl O , +.Fl P , +.Fl T , +.Fl U , +.Fl Y , +.Fl Z , +.Fl z , +the archive formats +.Ar bcpio , +.Ar sv4cpio , +.Ar sv4crc , +.Ar tar , +and the flawed archive handling during +.Ar list +and +.Ar read +operations are extensions to the +.Tn POSIX +standard. +.Sh HISTORY +A +.Nm +utility appeared in +.Bx 4.4 . +.Sh AUTHORS +.An -nosplit +.An Keith Muller +at the University of California, San Diego. +.An Luke Mewburn +implemented +.Fl M . diff --git a/bin/pax/pax.c b/bin/pax/pax.c new file mode 100644 index 0000000..3906569 --- /dev/null +++ b/bin/pax/pax.c @@ -0,0 +1,492 @@ +/* $NetBSD: pax.c,v 1.48 2017/10/02 21:55:35 joerg Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +#if !defined(lint) +__COPYRIGHT("@(#) Copyright (c) 1992, 1993\ + The Regents of the University of California. All rights reserved."); +#if 0 +static char sccsid[] = "@(#)pax.c 8.2 (Berkeley) 4/18/94"; +#else +__RCSID("$NetBSD: pax.c,v 1.48 2017/10/02 21:55:35 joerg Exp $"); +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "extern.h" +static int gen_init(void); + +/* + * PAX main routines, general globals and some simple start up routines + */ + +/* + * Variables that can be accessed by any routine within pax + */ +int act = ERROR; /* read/write/append/copy */ +FSUB *frmt = NULL; /* archive format type */ +int cflag; /* match all EXCEPT pattern/file */ +int cwdfd = -1; /* starting cwd */ +int dflag; /* directory member match only */ +int iflag; /* interactive file/archive rename */ +int kflag; /* do not overwrite existing files */ +int lflag; /* use hard links when possible */ +int nflag; /* select first archive member match */ +int tflag; /* restore access time after read */ +int uflag; /* ignore older modification time files */ +int vflag; /* produce verbose output */ +int Aflag; /* honor absolute path */ +int Dflag; /* same as uflag except inode change time */ +int Hflag; /* follow command line symlinks (write only) */ +int Lflag; /* follow symlinks when writing */ +int Mflag; /* treat stdin as an mtree(8) specfile */ +int Vflag; /* produce somewhat verbose output (no listing) */ +int Xflag; /* archive files with same device id only */ +int Yflag; /* same as Dflg except after name mode */ +int Zflag; /* same as uflg except after name mode */ +int vfpart; /* is partial verbose output in progress */ +int patime = 1; /* preserve file access time */ +int pmtime = 1; /* preserve file modification times */ +int nodirs; /* do not create directories as needed */ +int pfflags = 1; /* preserve file flags */ +int pmode; /* preserve file mode bits */ +int pids; /* preserve file uid/gid */ +int rmleadslash = 0; /* remove leading '/' from pathnames */ +int exit_val; /* exit value */ +int docrc; /* check/create file crc */ +int to_stdout; /* extract to stdout */ +char *dirptr; /* destination dir in a copy */ +char *ltmfrmt; /* -v locale time format (if any) */ +const char *argv0; /* root of argv[0] */ +sigset_t s_mask; /* signal mask for cleanup critical sect */ +FILE *listf; /* file pointer to print file list to */ +char *tempfile; /* tempfile to use for mkstemp(3) */ +char *tempbase; /* basename of tempfile to use for mkstemp(3) */ +int forcelocal; /* force local operation even if the name + * contains a : + */ +int secure = 1; /* don't extract names that contain .. */ + +/* + * PAX - Portable Archive Interchange + * + * A utility to read, write, and write lists of the members of archive + * files and copy directory hierarchies. A variety of archive formats + * are supported (some are described in POSIX 1003.1 10.1): + * + * ustar - 10.1.1 extended tar interchange format + * cpio - 10.1.2 extended cpio interchange format + * tar - old BSD 4.3 tar format + * binary cpio - old cpio with binary header format + * sysVR4 cpio - with and without CRC + * + * This version is a superset of IEEE Std 1003.2b-d3 + * + * Summary of Extensions to the IEEE Standard: + * + * 1 READ ENHANCEMENTS + * 1.1 Operations which read archives will continue to operate even when + * processing archives which may be damaged, truncated, or fail to meet + * format specs in several different ways. Damaged sections of archives + * are detected and avoided if possible. Attempts will be made to resync + * archive read operations even with badly damaged media. + * 1.2 Blocksize requirements are not strictly enforced on archive read. + * Tapes which have variable sized records can be read without errors. + * 1.3 The user can specify via the non-standard option flag -E if error + * resync operation should stop on a media error, try a specified number + * of times to correct, or try to correct forever. + * 1.4 Sparse files (lseek holes) stored on the archive (but stored with blocks + * of all zeros will be restored with holes appropriate for the target + * filesystem + * 1.5 The user is notified whenever something is found during archive + * read operations which violates spec (but the read will continue). + * 1.6 Multiple archive volumes can be read and may span over different + * archive devices + * 1.7 Rigidly restores all file attributes exactly as they are stored on the + * archive. + * 1.8 Modification change time ranges can be specified via multiple -T + * options. These allow a user to select files whose modification time + * lies within a specific time range. + * 1.9 Files can be selected based on owner (user name or uid) via one or more + * -U options. + * 1.10 Files can be selected based on group (group name or gid) via one o + * more -G options. + * 1.11 File modification time can be checked against existing file after + * name modification (-Z) + * + * 2 WRITE ENHANCEMENTS + * 2.1 Write operation will stop instead of allowing a user to create a flawed + * flawed archive (due to any problem). + * 2.2 Archives written by pax are forced to strictly conform to both the + * archive and pax the specific format specifications. + * 2.3 Blocking size and format is rigidly enforced on writes. + * 2.4 Formats which may exhibit header overflow problems (they have fields + * too small for large file systems, such as inode number storage), use + * routines designed to repair this problem. These techniques still + * conform to both pax and format specifications, but no longer truncate + * these fields. This removes any restrictions on using these archive + * formats on large file systems. + * 2.5 Multiple archive volumes can be written and may span over different + * archive devices + * 2.6 A archive volume record limit allows the user to specify the number + * of bytes stored on an archive volume. When reached the user is + * prompted for the next archive volume. This is specified with the + * non-standard -B flag. The limit is rounded up to the next blocksize. + * 2.7 All archive padding during write use zero filled sections. This makes + * it much easier to pull data out of flawed archive during read + * operations. + * 2.8 Access time reset with the -t applies to all file nodes (including + * directories). + * 2.9 Symbolic links can be followed with -L (optional in the spec). + * 2.10 Modification or inode change time ranges can be specified via + * multiple -T options. These allow a user to select files whose + * modification or inode change time lies within a specific time range. + * 2.11 Files can be selected based on owner (user name or uid) via one or more + * -U options. + * 2.12 Files can be selected based on group (group name or gid) via one o + * more -G options. + * 2.13 Symlinks which appear on the command line can be followed (without + * following other symlinks; -H flag) + * + * 3 COPY ENHANCEMENTS + * 3.1 Sparse files (lseek holes) can be copied without expanding the holes + * into zero filled blocks. The file copy is created with holes which are + * appropriate for the target filesystem + * 3.2 Access time as well as modification time on copied file trees can be + * preserved with the appropriate -p options. + * 3.3 Access time reset with the -t applies to all file nodes (including + * directories). + * 3.4 Symbolic links can be followed with -L (optional in the spec). + * 3.5 Modification or inode change time ranges can be specified via + * multiple -T options. These allow a user to select files whose + * modification or inode change time lies within a specific time range. + * 3.6 Files can be selected based on owner (user name or uid) via one or more + * -U options. + * 3.7 Files can be selected based on group (group name or gid) via one o + * more -G options. + * 3.8 Symlinks which appear on the command line can be followed (without + * following other symlinks; -H flag) + * 3.9 File inode change time can be checked against existing file before + * name modification (-D) + * 3.10 File inode change time can be checked against existing file after + * name modification (-Y) + * 3.11 File modification time can be checked against existing file after + * name modification (-Z) + * + * 4 GENERAL ENHANCEMENTS + * 4.1 Internal structure is designed to isolate format dependent and + * independent functions. Formats are selected via a format driver table. + * This encourages the addition of new archive formats by only having to + * write those routines which id, read and write the archive header. + */ + +/* + * main() + * parse options, set up and operate as specified by the user. + * any operational flaw will set exit_val to non-zero + * Return: 0 if ok, 1 otherwise + */ + +int +main(int argc, char **argv) +{ + const char *tmpdir; + size_t tdlen; + int rval; + + setprogname(argv[0]); + + listf = stderr; + + /* + * parse options, determine operational mode + */ + options(argc, argv); + + /* + * general init + */ + if ((gen_init() < 0) || (tty_init() < 0)) + return exit_val; + + /* + * Keep a reference to cwd, so we can always come back home. + */ + cwdfd = open(".", O_RDONLY); + if (cwdfd < 0) { + syswarn(1, errno, "Can't open current working directory."); + return exit_val; + } + if (updatepath() == -1) + return exit_val; + + /* + * Where should we put temporary files? + */ + if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') + tmpdir = _PATH_TMP; + tdlen = strlen(tmpdir); + while(tdlen > 0 && tmpdir[tdlen - 1] == '/') + tdlen--; + tempfile = malloc(tdlen + 1 + sizeof(_TFILE_BASE)); + if (tempfile == NULL) { + tty_warn(1, "Cannot allocate memory for temp file name."); + return exit_val; + } + if (tdlen) + memcpy(tempfile, tmpdir, tdlen); + tempbase = tempfile + tdlen; + *tempbase++ = '/'; + + (void)time(&starttime); +#ifdef SIGINFO + (void)signal(SIGINFO, ar_summary); +#endif + /* + * select a primary operation mode + */ + switch (act) { + case EXTRACT: + rval = extract(); + break; + case ARCHIVE: + rval = archive(); + break; + case APPND: + if (gzip_program != NULL) + err(1, "cannot gzip while appending"); + rval = append(); + /* + * Check if we tried to append on an empty file and + * turned into ARCHIVE mode. + */ + if (act == -ARCHIVE) { + act = ARCHIVE; + rval = archive(); + } + break; + case COPY: + rval = copy(); + break; + default: + case LIST: + rval = list(); + break; + } + if (rval != 0) + exit_val = 1; + return exit_val; +} + +/* + * sig_cleanup() + * when interrupted we try to do whatever delayed processing we can. + * This is not critical, but we really ought to limit our damage when we + * are aborted by the user. + * Return: + * never.... + */ + +__dead static void +sig_cleanup(int which_sig) +{ + /* + * restore modes and times for any dirs we may have created + * or any dirs we may have read. Set vflag and vfpart so the user + * will clearly see the message on a line by itself. + */ + vflag = vfpart = 1; +#ifdef SIGXCPU + if (which_sig == SIGXCPU) + tty_warn(1, "CPU time limit reached, cleaning up."); + else +#endif + tty_warn(1, "Signal caught, cleaning up."); + + /* delete any open temporary file */ + if (xtmp_name) + (void)unlink(xtmp_name); + ar_close(); + proc_dir(); + if (tflag) + atdir_end(); + + (void)raise_default_signal(which_sig); + exit(1); +} + +/* + * gen_init() + * general setup routines. Not all are required, but they really help + * when dealing with a medium to large sized archives. + */ + +static int +gen_init(void) +{ + struct rlimit reslimit; + struct sigaction n_hand; + struct sigaction o_hand; + + /* + * Really needed to handle large archives. We can run out of memory for + * internal tables really fast when we have a whole lot of files... + */ + if (getrlimit(RLIMIT_DATA , &reslimit) == 0){ + reslimit.rlim_cur = reslimit.rlim_max; + (void)setrlimit(RLIMIT_DATA , &reslimit); + } + + /* + * should file size limits be waived? if the os limits us, this is + * needed if we want to write a large archive + */ + if (getrlimit(RLIMIT_FSIZE , &reslimit) == 0){ + reslimit.rlim_cur = reslimit.rlim_max; + (void)setrlimit(RLIMIT_FSIZE , &reslimit); + } + + /* + * increase the size the stack can grow to + */ + if (getrlimit(RLIMIT_STACK , &reslimit) == 0){ + reslimit.rlim_cur = reslimit.rlim_max; + (void)setrlimit(RLIMIT_STACK , &reslimit); + } + +#ifdef RLIMIT_RSS + /* + * not really needed, but doesn't hurt + */ + if (getrlimit(RLIMIT_RSS , &reslimit) == 0){ + reslimit.rlim_cur = reslimit.rlim_max; + (void)setrlimit(RLIMIT_RSS , &reslimit); + } +#endif + + /* + * Handle posix locale + * + * set user defines time printing format for -v option + */ + ltmfrmt = getenv("LC_TIME"); + + /* + * signal handling to reset stored directory times and modes. Since + * we deal with broken pipes via failed writes we ignore it. We also + * deal with any file size limit through failed writes. CPU time + * limits are caught and a cleanup is forced. + */ + if ((sigemptyset(&s_mask) < 0) || (sigaddset(&s_mask, SIGTERM) < 0) || + (sigaddset(&s_mask,SIGINT) < 0)||(sigaddset(&s_mask,SIGHUP) < 0) || + (sigaddset(&s_mask,SIGPIPE) < 0)||(sigaddset(&s_mask,SIGQUIT)<0)){ + tty_warn(1, "Unable to set up signal mask"); + return -1; + } +#ifdef SIGXCPU + if (sigaddset(&s_mask,SIGXCPU) < 0) { + tty_warn(1, "Unable to set up signal mask"); + return -1; + } +#endif +#ifdef SIGXFSZ + if (sigaddset(&s_mask,SIGXFSZ) < 0) { + tty_warn(1, "Unable to set up signal mask"); + return -1; + } +#endif + + memset(&n_hand, 0, sizeof n_hand); + n_hand.sa_mask = s_mask; + n_hand.sa_flags = 0; + n_hand.sa_handler = sig_cleanup; + + if ((sigaction(SIGHUP, &n_hand, &o_hand) < 0) && + (o_hand.sa_handler == SIG_IGN) && + (sigaction(SIGHUP, &o_hand, &o_hand) < 0)) + goto out; + + if ((sigaction(SIGTERM, &n_hand, &o_hand) < 0) && + (o_hand.sa_handler == SIG_IGN) && + (sigaction(SIGTERM, &o_hand, &o_hand) < 0)) + goto out; + + if ((sigaction(SIGINT, &n_hand, &o_hand) < 0) && + (o_hand.sa_handler == SIG_IGN) && + (sigaction(SIGINT, &o_hand, &o_hand) < 0)) + goto out; + + if ((sigaction(SIGQUIT, &n_hand, &o_hand) < 0) && + (o_hand.sa_handler == SIG_IGN) && + (sigaction(SIGQUIT, &o_hand, &o_hand) < 0)) + goto out; + +#ifdef SIGXCPU + if ((sigaction(SIGXCPU, &n_hand, &o_hand) < 0) && + (o_hand.sa_handler == SIG_IGN) && + (sigaction(SIGXCPU, &o_hand, &o_hand) < 0)) + goto out; +#endif + n_hand.sa_handler = SIG_IGN; + if (sigaction(SIGPIPE, &n_hand, &o_hand) < 0) + goto out; +#ifdef SIGXFSZ + if (sigaction(SIGXFSZ, &n_hand, &o_hand) < 0) + goto out; +#endif + return 0; + + out: + syswarn(1, errno, "Unable to set up signal handler"); + return -1; +} diff --git a/bin/pax/pax.h b/bin/pax/pax.h new file mode 100644 index 0000000..ccc0fb7 --- /dev/null +++ b/bin/pax/pax.h @@ -0,0 +1,283 @@ +/* $NetBSD: pax.h,v 1.31 2012/08/09 08:09:21 christos Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)pax.h 8.2 (Berkeley) 4/18/94 + */ + +#if ! HAVE_NBTOOL_CONFIG_H +#define HAVE_LUTIMES 1 +#define HAVE_STRUCT_STAT_ST_FLAGS 1 +#endif + +/* + * BSD PAX global data structures and constants. + */ + +#define MAXBLK 32256 /* MAX blocksize supported (posix SPEC) */ + /* WARNING: increasing MAXBLK past 32256 */ + /* will violate posix spec. */ +#define BLKMULT 512 /* blocksize must be even mult of 512 bytes */ + /* Don't even think of changing this */ +#define DEVBLK 8192 /* default read blksize for devices */ +#define FILEBLK 10240 /* default read blksize for files */ +#define PAXPATHLEN 3072 /* maximum path length for pax. MUST be */ + /* longer than the system MAXPATHLEN */ + +/* + * Pax modes of operation + */ +#define ERROR -1 /* nothing selected */ +#define LIST 0 /* List the file in an archive */ +#define EXTRACT 1 /* extract the files in an archive */ +#define ARCHIVE 2 /* write a new archive */ +#define APPND 3 /* append to the end of an archive */ +#define COPY 4 /* copy files to destination dir */ + +/* + * Device type of the current archive volume + */ +#define ISREG 0 /* regular file */ +#define ISCHR 1 /* character device */ +#define ISBLK 2 /* block device */ +#define ISTAPE 3 /* tape drive */ +#define ISPIPE 4 /* pipe/socket */ +#ifdef SUPPORT_RMT +#define ISRMT 5 /* rmt */ +#endif + +/* + * Pattern matching structure + * + * Used to store command line patterns + */ +typedef struct pattern { + char *pstr; /* pattern to match, user supplied */ + char *pend; /* end of a prefix match */ + char *chdname; /* the dir to change to if not NULL. */ + int plen; /* length of pstr */ + int flgs; /* processing/state flags */ +#define MTCH 0x1 /* pattern has been matched */ +#define DIR_MTCH 0x2 /* pattern matched a directory */ +#define NOGLOB_MTCH 0x4 /* non-globbing match */ + struct pattern *fow; /* next pattern */ +} PATTERN; + +/* + * General Archive Structure (used internal to pax) + * + * This structure is used to pass information about archive members between + * the format independent routines and the format specific routines. When + * new archive formats are added, they must accept requests and supply info + * encoded in a structure of this type. The name fields are declared statically + * here, as there is only ONE of these floating around, size is not a major + * consideration. Eventually converting the name fields to a dynamic length + * may be required if and when the supporting operating system removes all + * restrictions on the length of pathnames it will resolve. + */ +typedef struct { + int nlen; /* file name length */ + char name[PAXPATHLEN+1]; /* file name */ + int ln_nlen; /* link name length */ + char ln_name[PAXPATHLEN+1]; /* name to link to (if any) */ + char *org_name; /* orig name in file system */ + char fts_name[PAXPATHLEN+1]; /* name from fts (for *org_name) */ + char *tmp_name; /* tmp name used to restore */ + PATTERN *pat; /* ptr to pattern match (if any) */ + struct stat sb; /* stat buffer see stat(2) */ + off_t pad; /* bytes of padding after file xfer */ + off_t skip; /* bytes of real data after header */ + /* IMPORTANT. The st_size field does */ + /* not always indicate the amount of */ + /* data following the header. */ + u_long crc; /* file crc */ + int type; /* type of file node */ +#define PAX_DIR 1 /* directory */ +#define PAX_CHR 2 /* character device */ +#define PAX_BLK 3 /* block device */ +#define PAX_REG 4 /* regular file */ +#define PAX_SLK 5 /* symbolic link */ +#define PAX_SCK 6 /* socket */ +#define PAX_FIF 7 /* fifo */ +#define PAX_HLK 8 /* hard link */ +#define PAX_HRG 9 /* hard link to a regular file */ +#define PAX_CTG 10 /* high performance file */ +#define PAX_GLL 11 /* GNU long symlink */ +#define PAX_GLF 12 /* GNU long file */ +} ARCHD; + +/* + * Format Specific Routine Table + * + * The format specific routine table allows new archive formats to be quickly + * added. Overall pax operation is independent of the actual format used to + * form the archive. Only those routines which deal directly with the archive + * are tailored to the oddities of the specific format. All other routines are + * independent of the archive format. Data flow in and out of the format + * dependent routines pass pointers to ARCHD structure (described below). + */ +typedef struct { + const char *name; /* name of format, this is the name the user */ + /* gives to -x option to select it. */ + int bsz; /* default block size. used when the user */ + /* does not specify a blocksize for writing */ + /* Appends continue to with the blocksize */ + /* the archive is currently using.*/ + int hsz; /* Header size in bytes. this is the size of */ + /* the smallest header this format supports. */ + /* Headers are assumed to fit in a BLKMULT. */ + /* If they are bigger, get_head() and */ + /* get_arc() must be adjusted */ + int udev; /* does append require unique dev/ino? some */ + /* formats use the device and inode fields */ + /* to specify hard links. when members in */ + /* the archive have the same inode/dev they */ + /* are assumed to be hard links. During */ + /* append we may have to generate unique ids */ + /* to avoid creating incorrect hard links */ + int hlk; /* does archive store hard links info? if */ + /* not, we do not bother to look for them */ + /* during archive write operations */ + int blkalgn; /* writes must be aligned to blkalgn boundary */ + int inhead; /* is the trailer encoded in a valid header? */ + /* if not, trailers are assumed to be found */ + /* in invalid headers (i.e like tar) */ + int (*id)(char *, int); /* checks if a buffer is a valid header */ + /* returns 1 if it is, o.w. returns a 0 */ + int (*st_rd)(void); /* initialize routine for read. so format */ + /* can set up tables etc before it starts */ + /* reading an archive */ + int (*rd) /* read header routine. passed a pointer to */ + (ARCHD *, char *); /* ARCHD. It must extract the info */ + /* from the format and store it in the ARCHD */ + /* struct. This routine is expected to fill */ + /* all the fields in the ARCHD (including */ + /* stat buf). 0 is returned when a valid */ + /* header is found. -1 when not valid. This */ + /* routine set the skip and pad fields so the */ + /* format independent routines know the */ + /* amount of padding and the number of bytes */ + /* of data which follow the header. This info */ + /* is used to skip to the next file header */ + off_t (*end_rd)(void); /* read cleanup. Allows format to clean up */ + /* and MUST RETURN THE LENGTH OF THE TRAILER */ + /* RECORD (so append knows how many bytes */ + /* to move back to rewrite the trailer) */ + int (*st_wr)(void); /* initialize routine for write operations */ + int (*wr)(ARCHD *); /* write archive header. Passed an ARCHD */ + /* filled with the specs on the next file to */ + /* archived. Returns a 1 if no file data is */ + /* is to be stored; 0 if file data is to be */ + /* added. A -1 is returned if a write */ + /* operation to the archive failed. this */ + /* function sets the skip and pad fields so */ + /* the proper padding can be added after */ + /* file data. This routine must NEVER write */ + /* a flawed archive header. */ + int (*end_wr)(void); /* end write. write the trailer and do any */ + /* other format specific functions needed */ + /* at the end of an archive write */ + int (*trail) /* returns 0 if a valid trailer, -1 if not */ + (char *, int, int *); /* For formats which encode the */ + /* trailer outside of a valid header, a */ + /* return value of 1 indicates that the block */ + /* passed to it can never contain a valid */ + /* header (skip this block, no point in */ + /* looking at it) */ + int (*subtrail) /* read/process file data from the archive */ + (ARCHD *); /* this function is called for trailers */ + /* inside headers. */ + int (*rd_data) /* read/process file data from the archive */ + (ARCHD *, int, off_t *); + int (*wr_data) /* write/process file data to the archive */ + (ARCHD *, int, off_t *); + int (*options)(void); /* process format specific options (-o) */ +} FSUB; + +/* + * Format Specific Options List + * + * Used to pass format options to the format options handler + */ +typedef struct oplist { + char *name; /* option variable name e.g. name= */ + char *value; /* value for option variable */ + struct oplist *fow; /* next option */ +} OPLIST; + +/* + * General Macros + */ +#ifndef MIN +#define MIN(a,b) (((a)<(b))?(a):(b)) +#endif + +#ifdef HOSTPROG +# include "pack_dev.h" /* explicitly use NetBSD's macros */ +# define MAJOR(x) major_netbsd(x) +# define MINOR(x) minor_netbsd(x) +# define TODEV(x, y) makedev_netbsd((x), (y)) +#else +# define MAJOR(x) major(x) +# define MINOR(x) minor(x) +# define TODEV(x, y) makedev((x), (y)) +#endif + +/* + * General Defines + */ +#define HEX 16 +#define OCT 8 +#define _PAX_ 1 + +/* + * Pathname base component of the temporary file template, to be created in + * ${TMPDIR} or, as a fall-back, _PATH_TMP. + */ +#define _TFILE_BASE "paxXXXXXXXXXX" + +/* + * Macros to manipulate off_t as uintmax_t + */ +#define OFFT_F "%" PRIuMAX +#define OFFT_FP(x) "%" x PRIuMAX +#define OFFT_T uintmax_t +#define ASC_OFFT(x,y,z) asc_umax(x,y,z) +#define OFFT_ASC(w,x,y,z) umax_asc((uintmax_t)w,x,y,z) +#define OFFT_OCT(w,x,y,z) umax_oct((uintmax_t)w,x,y,z) +#define STRTOOFFT(x,y,z) strtoimax(x,y,z) +#define OFFT_MAX INTMAX_MAX + +#define TOP_HALF 0xffffffff00000000ULL +#define BOTTOM_HALF 0x00000000ffffffffULL + diff --git a/bin/pax/sel_subs.c b/bin/pax/sel_subs.c new file mode 100644 index 0000000..87f1e79 --- /dev/null +++ b/bin/pax/sel_subs.c @@ -0,0 +1,617 @@ +/* $NetBSD: sel_subs.c,v 1.24 2011/08/31 16:24:54 plunky Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +#if !defined(lint) +#if 0 +static char sccsid[] = "@(#)sel_subs.c 8.1 (Berkeley) 5/31/93"; +#else +__RCSID("$NetBSD: sel_subs.c,v 1.24 2011/08/31 16:24:54 plunky Exp $"); +#endif +#endif /* not lint */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pax.h" +#include "sel_subs.h" +#include "extern.h" + +static int str_sec(const char *, time_t *); +static int usr_match(ARCHD *); +static int grp_match(ARCHD *); +static int trng_match(ARCHD *); + +static TIME_RNG *trhead = NULL; /* time range list head */ +static TIME_RNG *trtail = NULL; /* time range list tail */ +static USRT **usrtb = NULL; /* user selection table */ +static GRPT **grptb = NULL; /* group selection table */ + +/* + * Routines for selection of archive members + */ + +/* + * sel_chk() + * check if this file matches a specified uid, gid or time range + * Return: + * 0 if this archive member should be processed, 1 if it should be skipped + */ + +int +sel_chk(ARCHD *arcn) +{ + if (((usrtb != NULL) && usr_match(arcn)) || + ((grptb != NULL) && grp_match(arcn)) || + ((trhead != NULL) && trng_match(arcn))) + return 1; + return 0; +} + +/* + * User/group selection routines + * + * Routines to handle user selection of files based on the file uid/gid. To + * add an entry, the user supplies either the name or the uid/gid starting with + * a # on the command line. A \# will escape the #. + */ + +/* + * usr_add() + * add a user match to the user match hash table + * Return: + * 0 if added ok, -1 otherwise; + */ + +int +usr_add(char *str) +{ + u_int indx; + USRT *pt; + struct passwd *pw; + uid_t uid; + + /* + * create the table if it doesn't exist + */ + if ((str == NULL) || (*str == '\0')) + return -1; + if ((usrtb == NULL) && + ((usrtb = (USRT **)calloc(USR_TB_SZ, sizeof(USRT *))) == NULL)) { + tty_warn(1, + "Unable to allocate memory for user selection table"); + return -1; + } + + /* + * figure out user spec + */ + if (str[0] != '#') { + /* + * it is a user name, \# escapes # as first char in user name + */ + if ((str[0] == '\\') && (str[1] == '#')) + ++str; + if ((pw = getpwnam(str)) == NULL) { + tty_warn(1, "Unable to find uid for user: %s", str); + return -1; + } + uid = (uid_t)pw->pw_uid; + } else + uid = (uid_t)strtoul(str+1, NULL, 10); + endpwent(); + + /* + * hash it and go down the hash chain (if any) looking for it + */ + indx = ((unsigned)uid) % USR_TB_SZ; + if ((pt = usrtb[indx]) != NULL) { + while (pt != NULL) { + if (pt->uid == uid) + return 0; + pt = pt->fow; + } + } + + /* + * uid is not yet in the table, add it to the front of the chain + */ + if ((pt = (USRT *)malloc(sizeof(USRT))) != NULL) { + pt->uid = uid; + pt->fow = usrtb[indx]; + usrtb[indx] = pt; + return 0; + } + tty_warn(1, "User selection table out of memory"); + return -1; +} + +/* + * usr_match() + * check if this files uid matches a selected uid. + * Return: + * 0 if this archive member should be processed, 1 if it should be skipped + */ + +static int +usr_match(ARCHD *arcn) +{ + USRT *pt; + + /* + * hash and look for it in the table + */ + pt = usrtb[((unsigned)arcn->sb.st_uid) % USR_TB_SZ]; + while (pt != NULL) { + if (pt->uid == arcn->sb.st_uid) + return 0; + pt = pt->fow; + } + + /* + * not found + */ + return 1; +} + +/* + * grp_add() + * add a group match to the group match hash table + * Return: + * 0 if added ok, -1 otherwise; + */ + +int +grp_add(char *str) +{ + u_int indx; + GRPT *pt; + struct group *gr; + gid_t gid; + + /* + * create the table if it doesn't exist + */ + if ((str == NULL) || (*str == '\0')) + return -1; + if ((grptb == NULL) && + ((grptb = (GRPT **)calloc(GRP_TB_SZ, sizeof(GRPT *))) == NULL)) { + tty_warn(1, + "Unable to allocate memory fo group selection table"); + return -1; + } + + /* + * figure out user spec + */ + if (str[0] != '#') { + /* + * it is a group name, \# escapes # as first char in group name + */ + if ((str[0] == '\\') && (str[1] == '#')) + ++str; + if ((gr = getgrnam(str)) == NULL) { + tty_warn(1, + "Cannot determine gid for group name: %s", str); + return -1; + } + gid = (gid_t)gr->gr_gid; + } else + gid = (gid_t)strtoul(str+1, NULL, 10); + endgrent(); + + /* + * hash it and go down the hash chain (if any) looking for it + */ + indx = ((unsigned)gid) % GRP_TB_SZ; + if ((pt = grptb[indx]) != NULL) { + while (pt != NULL) { + if (pt->gid == gid) + return 0; + pt = pt->fow; + } + } + + /* + * gid not in the table, add it to the front of the chain + */ + if ((pt = (GRPT *)malloc(sizeof(GRPT))) != NULL) { + pt->gid = gid; + pt->fow = grptb[indx]; + grptb[indx] = pt; + return 0; + } + tty_warn(1, "Group selection table out of memory"); + return -1; +} + +/* + * grp_match() + * check if this files gid matches a selected gid. + * Return: + * 0 if this archive member should be processed, 1 if it should be skipped + */ + +static int +grp_match(ARCHD *arcn) +{ + GRPT *pt; + + /* + * hash and look for it in the table + */ + pt = grptb[((unsigned)arcn->sb.st_gid) % GRP_TB_SZ]; + while (pt != NULL) { + if (pt->gid == arcn->sb.st_gid) + return 0; + pt = pt->fow; + } + + /* + * not found + */ + return 1; +} + +/* + * Time range selection routines + * + * Routines to handle user selection of files based on the modification and/or + * inode change time falling within a specified time range (the non-standard + * -T flag). The user may specify any number of different file time ranges. + * Time ranges are checked one at a time until a match is found (if at all). + * If the file has a mtime (and/or ctime) which lies within one of the time + * ranges, the file is selected. Time ranges may have a lower and/or a upper + * value. These ranges are inclusive. When no time ranges are supplied to pax + * with the -T option, all members in the archive will be selected by the time + * range routines. When only a lower range is supplied, only files with a + * mtime (and/or ctime) equal to or younger are selected. When only a upper + * range is supplied, only files with a mtime (and/or ctime) equal to or older + * are selected. When the lower time range is equal to the upper time range, + * only files with a mtime (or ctime) of exactly that time are selected. + */ + +/* + * trng_add() + * add a time range match to the time range list. + * This is a non-standard pax option. Lower and upper ranges are in the + * format: [yy[mm[dd[hh]]]]mm[.ss] and are comma separated. + * Time ranges are based on current time, so 1234 would specify a time of + * 12:34 today. + * Return: + * 0 if the time range was added to the list, -1 otherwise + */ + +int +trng_add(char *str) +{ + TIME_RNG *pt; + char *up_pt = NULL; + char *stpt; + char *flgpt; + int dot = 0; + + /* + * throw out the badly formed time ranges + */ + if ((str == NULL) || (*str == '\0')) { + tty_warn(1, "Empty time range string"); + return -1; + } + + /* + * locate optional flags suffix /{cm}. + */ + if ((flgpt = strrchr(str, '/')) != NULL) + *flgpt++ = '\0'; + + for (stpt = str; *stpt != '\0'; ++stpt) { + if ((*stpt >= '0') && (*stpt <= '9')) + continue; + if ((*stpt == ',') && (up_pt == NULL)) { + *stpt = '\0'; + up_pt = stpt + 1; + dot = 0; + continue; + } + + /* + * allow only one dot per range (secs) + */ + if ((*stpt == '.') && (!dot)) { + ++dot; + continue; + } + tty_warn(1, "Improperly specified time range: %s", str); + goto out; + } + + /* + * allocate space for the time range and store the limits + */ + if ((pt = malloc(sizeof(TIME_RNG))) == NULL) { + tty_warn(1, "Unable to allocate memory for time range"); + return -1; + } + + /* + * by default we only will check file mtime, but user can specify + * mtime, ctime (inode change time) or both. + */ + if ((flgpt == NULL) || (*flgpt == '\0')) + pt->flgs = CMPMTME; + else { + pt->flgs = 0; + while (*flgpt != '\0') { + switch(*flgpt) { + case 'M': + case 'm': + pt->flgs |= CMPMTME; + break; + case 'C': + case 'c': + pt->flgs |= CMPCTME; + break; + default: + tty_warn(1, "Bad option %c with time range %s", + *flgpt, str); + free(pt); + goto out; + } + ++flgpt; + } + } + + /* + * start off with the current time + */ + pt->low_time = pt->high_time = time(NULL); + if (*str != '\0') { + /* + * add lower limit + */ + if (str_sec(str, &(pt->low_time)) < 0) { + tty_warn(1, "Illegal lower time range %s", str); + free(pt); + goto out; + } + pt->flgs |= HASLOW; + } + + if ((up_pt != NULL) && (*up_pt != '\0')) { + /* + * add upper limit + */ + if (str_sec(up_pt, &(pt->high_time)) < 0) { + tty_warn(1, "Illegal upper time range %s", up_pt); + free(pt); + goto out; + } + pt->flgs |= HASHIGH; + + /* + * check that the upper and lower do not overlap + */ + if (pt->flgs & HASLOW) { + if (pt->low_time > pt->high_time) { + tty_warn(1, + "Upper %s and lower %s time overlap", + up_pt, str); + free(pt); + return -1; + } + } + } + + pt->fow = NULL; + if (trhead == NULL) { + trtail = trhead = pt; + return 0; + } + trtail->fow = pt; + trtail = pt; + return 0; + + out: + tty_warn(1, "Time range format is: [yy[mm[dd[hh]]]]mm[.ss][/[c][m]]"); + return -1; +} + +/* + * trng_match() + * check if this files mtime/ctime falls within any supplied time range. + * Return: + * 0 if this archive member should be processed, 1 if it should be skipped + */ + +static int +trng_match(ARCHD *arcn) +{ + TIME_RNG *pt; + + /* + * have to search down the list one at a time looking for a match. + * remember time range limits are inclusive. + */ + pt = trhead; + while (pt != NULL) { + switch(pt->flgs & CMPBOTH) { + case CMPBOTH: + /* + * user wants both mtime and ctime checked for this + * time range + */ + if (((pt->flgs & HASLOW) && + (arcn->sb.st_mtime < pt->low_time) && + (arcn->sb.st_ctime < pt->low_time)) || + ((pt->flgs & HASHIGH) && + (arcn->sb.st_mtime > pt->high_time) && + (arcn->sb.st_ctime > pt->high_time))) { + pt = pt->fow; + continue; + } + break; + case CMPCTME: + /* + * user wants only ctime checked for this time range + */ + if (((pt->flgs & HASLOW) && + (arcn->sb.st_ctime < pt->low_time)) || + ((pt->flgs & HASHIGH) && + (arcn->sb.st_ctime > pt->high_time))) { + pt = pt->fow; + continue; + } + break; + case CMPMTME: + default: + /* + * user wants only mtime checked for this time range + */ + if (((pt->flgs & HASLOW) && + (arcn->sb.st_mtime < pt->low_time)) || + ((pt->flgs & HASHIGH) && + (arcn->sb.st_mtime > pt->high_time))) { + pt = pt->fow; + continue; + } + break; + } + break; + } + + if (pt == NULL) + return 1; + return 0; +} + +/* + * str_sec() + * Convert a time string in the format of [yy[mm[dd[hh]]]]mm[.ss] to gmt + * seconds. Tval already has current time loaded into it at entry. + * Return: + * 0 if converted ok, -1 otherwise + */ + +#define ATOI2(s) ((s) += 2, ((s)[-2] - '0') * 10 + ((s)[-1] - '0')) + +static int +str_sec(const char *p, time_t *tval) +{ + struct tm *lt; + const char *dot, *t; + int yearset, len; + + for (t = p, dot = NULL; *t; ++t) { + if (isdigit((unsigned char)*t)) + continue; + if (*t == '.' && dot == NULL) { + dot = t; + continue; + } + return -1; + } + + lt = localtime(tval); + + if (dot != NULL) { + len = strlen(dot); + if (len != 3) + return -1; + ++dot; + lt->tm_sec = ATOI2(dot); + } else { + len = 0; + lt->tm_sec = 0; + } + + yearset = 0; + switch (strlen(p) - len) { + case 12: + lt->tm_year = ATOI2(p) * 100 - TM_YEAR_BASE; + yearset = 1; + /* FALLTHROUGH */ + case 10: + if (yearset) { + lt->tm_year += ATOI2(p); + } else { + yearset = ATOI2(p); + if (yearset < 69) + lt->tm_year = yearset + 2000 - TM_YEAR_BASE; + else + lt->tm_year = yearset + 1900 - TM_YEAR_BASE; + } + /* FALLTHROUGH */ + case 8: + lt->tm_mon = ATOI2(p); + --lt->tm_mon; + /* FALLTHROUGH */ + case 6: + lt->tm_mday = ATOI2(p); + /* FALLTHROUGH */ + case 4: + lt->tm_hour = ATOI2(p); + /* FALLTHROUGH */ + case 2: + lt->tm_min = ATOI2(p); + break; + default: + return -1; + } + + /* + * convert broken-down time to GMT clock time seconds + */ + if ((*tval = mktime(lt)) == -1) + return -1; + return 0; +} diff --git a/bin/pax/sel_subs.h b/bin/pax/sel_subs.h new file mode 100644 index 0000000..c91f3ea --- /dev/null +++ b/bin/pax/sel_subs.h @@ -0,0 +1,69 @@ +/* $NetBSD: sel_subs.h,v 1.6 2003/10/13 07:41:22 agc Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)sel_subs.h 8.1 (Berkeley) 5/31/93 + */ + +/* + * data structure for storing uid/grp selects (-U, -G non standard options) + */ + +#define USR_TB_SZ 317 /* user selection table size */ +#define GRP_TB_SZ 317 /* user selection table size */ + +typedef struct usrt { + uid_t uid; + struct usrt *fow; /* next uid */ +} USRT; + +typedef struct grpt { + gid_t gid; + struct grpt *fow; /* next gid */ +} GRPT; + +/* + * data structure for storing user supplied time ranges (-T option) + */ + +typedef struct time_rng { + time_t low_time; /* lower inclusive time limit */ + time_t high_time; /* higher inclusive time limit */ + int flgs; /* option flags */ +#define HASLOW 0x01 /* has lower time limit */ +#define HASHIGH 0x02 /* has higher time limit */ +#define CMPMTME 0x04 /* compare file modification time */ +#define CMPCTME 0x08 /* compare inode change time */ +#define CMPBOTH (CMPMTME|CMPCTME) /* compare inode and mod time */ + struct time_rng *fow; /* next pattern */ +} TIME_RNG; diff --git a/bin/pax/tables.c b/bin/pax/tables.c new file mode 100644 index 0000000..dd135fe --- /dev/null +++ b/bin/pax/tables.c @@ -0,0 +1,1379 @@ +/* $NetBSD: tables.c,v 1.31 2013/10/18 19:53:34 christos Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +#if !defined(lint) +#if 0 +static char sccsid[] = "@(#)tables.c 8.1 (Berkeley) 5/31/93"; +#else +__RCSID("$NetBSD: tables.c,v 1.31 2013/10/18 19:53:34 christos Exp $"); +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "tables.h" +#include "extern.h" + +/* + * Routines for controlling the contents of all the different databases pax + * keeps. Tables are dynamically created only when they are needed. The + * goal was speed and the ability to work with HUGE archives. The databases + * were kept simple, but do have complex rules for when the contents change. + * As of this writing, the POSIX library functions were more complex than + * needed for this application (pax databases have very short lifetimes and + * do not survive after pax is finished). Pax is required to handle very + * large archives. These database routines carefully combine memory usage and + * temporary file storage in ways which will not significantly impact runtime + * performance while allowing the largest possible archives to be handled. + * Trying to force the fit to the POSIX database routines was not considered + * time well spent. + */ + +static HRDLNK **ltab = NULL; /* hard link table for detecting hard links */ +static FTM **ftab = NULL; /* file time table for updating arch */ +static NAMT **ntab = NULL; /* interactive rename storage table */ +static DEVT **dtab = NULL; /* device/inode mapping tables */ +static ATDIR **atab = NULL; /* file tree directory time reset table */ +#ifdef DIRS_USE_FILE +static int dirfd = -1; /* storage for setting created dir time/mode */ +static u_long dircnt; /* entries in dir time/mode storage */ +#endif +static int ffd = -1; /* tmp file for file time table name storage */ + +static DEVT *chk_dev(dev_t, int); + +/* + * hard link table routines + * + * The hard link table tries to detect hard links to files using the device and + * inode values. We do this when writing an archive, so we can tell the format + * write routine that this file is a hard link to another file. The format + * write routine then can store this file in whatever way it wants (as a hard + * link if the format supports that like tar, or ignore this info like cpio). + * (Actually a field in the format driver table tells us if the format wants + * hard link info. if not, we do not waste time looking for them). We also use + * the same table when reading an archive. In that situation, this table is + * used by the format read routine to detect hard links from stored dev and + * inode numbers (like cpio). This will allow pax to create a link when one + * can be detected by the archive format. + */ + +/* + * lnk_start + * Creates the hard link table. + * Return: + * 0 if created, -1 if failure + */ + +int +lnk_start(void) +{ + if (ltab != NULL) + return 0; + if ((ltab = (HRDLNK **)calloc(L_TAB_SZ, sizeof(HRDLNK *))) == NULL) { + tty_warn(1, "Cannot allocate memory for hard link table"); + return -1; + } + return 0; +} + +/* + * chk_lnk() + * Looks up entry in hard link hash table. If found, it copies the name + * of the file it is linked to (we already saw that file) into ln_name. + * lnkcnt is decremented and if goes to 1 the node is deleted from the + * database. (We have seen all the links to this file). If not found, + * we add the file to the database if it has the potential for having + * hard links to other files we may process (it has a link count > 1) + * Return: + * if found returns 1; if not found returns 0; -1 on error + */ + +int +chk_lnk(ARCHD *arcn) +{ + HRDLNK *pt; + HRDLNK **ppt; + u_int indx; + + if (ltab == NULL) + return -1; + /* + * ignore those nodes that cannot have hard links + */ + if ((arcn->type == PAX_DIR) || (arcn->sb.st_nlink <= 1)) + return 0; + + /* + * hash inode number and look for this file + */ + indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ; + if ((pt = ltab[indx]) != NULL) { + /* + * its hash chain is not empty, walk down looking for it + */ + ppt = &(ltab[indx]); + while (pt != NULL) { + if ((pt->ino == arcn->sb.st_ino) && + (pt->dev == arcn->sb.st_dev)) + break; + ppt = &(pt->fow); + pt = pt->fow; + } + + if (pt != NULL) { + /* + * found a link. set the node type and copy in the + * name of the file it is to link to. we need to + * handle hardlinks to regular files differently than + * other links. + */ + arcn->ln_nlen = strlcpy(arcn->ln_name, pt->name, + sizeof(arcn->ln_name)); + if (arcn->type == PAX_REG) + arcn->type = PAX_HRG; + else + arcn->type = PAX_HLK; + + /* + * if we have found all the links to this file, remove + * it from the database + */ + if (--pt->nlink <= 1) { + *ppt = pt->fow; + (void)free((char *)pt->name); + (void)free((char *)pt); + } + return 1; + } + } + + /* + * we never saw this file before. It has links so we add it to the + * front of this hash chain + */ + if ((pt = (HRDLNK *)malloc(sizeof(HRDLNK))) != NULL) { + if ((pt->name = strdup(arcn->name)) != NULL) { + pt->dev = arcn->sb.st_dev; + pt->ino = arcn->sb.st_ino; + pt->nlink = arcn->sb.st_nlink; + pt->fow = ltab[indx]; + ltab[indx] = pt; + return 0; + } + (void)free((char *)pt); + } + + tty_warn(1, "Hard link table out of memory"); + return -1; +} + +/* + * purg_lnk + * remove reference for a file that we may have added to the data base as + * a potential source for hard links. We ended up not using the file, so + * we do not want to accidentally point another file at it later on. + */ + +void +purg_lnk(ARCHD *arcn) +{ + HRDLNK *pt; + HRDLNK **ppt; + u_int indx; + + if (ltab == NULL) + return; + /* + * do not bother to look if it could not be in the database + */ + if ((arcn->sb.st_nlink <= 1) || (arcn->type == PAX_DIR) || + (arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) + return; + + /* + * find the hash chain for this inode value, if empty return + */ + indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ; + if ((pt = ltab[indx]) == NULL) + return; + + /* + * walk down the list looking for the inode/dev pair, unlink and + * free if found + */ + ppt = &(ltab[indx]); + while (pt != NULL) { + if ((pt->ino == arcn->sb.st_ino) && + (pt->dev == arcn->sb.st_dev)) + break; + ppt = &(pt->fow); + pt = pt->fow; + } + if (pt == NULL) + return; + + /* + * remove and free it + */ + *ppt = pt->fow; + (void)free((char *)pt->name); + (void)free((char *)pt); +} + +/* + * lnk_end() + * pull apart a existing link table so we can reuse it. We do this between + * read and write phases of append with update. (The format may have + * used the link table, and we need to start with a fresh table for the + * write phase + */ + +void +lnk_end(void) +{ + int i; + HRDLNK *pt; + HRDLNK *ppt; + + if (ltab == NULL) + return; + + for (i = 0; i < L_TAB_SZ; ++i) { + if (ltab[i] == NULL) + continue; + pt = ltab[i]; + ltab[i] = NULL; + + /* + * free up each entry on this chain + */ + while (pt != NULL) { + ppt = pt; + pt = ppt->fow; + (void)free((char *)ppt->name); + (void)free((char *)ppt); + } + } + return; +} + +/* + * modification time table routines + * + * The modification time table keeps track of last modification times for all + * files stored in an archive during a write phase when -u is set. We only + * add a file to the archive if it is newer than a file with the same name + * already stored on the archive (if there is no other file with the same + * name on the archive it is added). This applies to writes and appends. + * An append with an -u must read the archive and store the modification time + * for every file on that archive before starting the write phase. It is clear + * that this is one HUGE database. To save memory space, the actual file names + * are stored in a scratch file and indexed by an in-memory hash table. The + * hash table is indexed by hashing the file path. The nodes in the table store + * the length of the filename and the lseek offset within the scratch file + * where the actual name is stored. Since there are never any deletions from this + * table, fragmentation of the scratch file is never a issue. Lookups seem to + * not exhibit any locality at all (files in the database are rarely + * looked up more than once...), so caching is just a waste of memory. The + * only limitation is the amount of scratch file space available to store the + * path names. + */ + +/* + * ftime_start() + * create the file time hash table and open for read/write the scratch + * file. (after created it is unlinked, so when we exit we leave + * no witnesses). + * Return: + * 0 if the table and file was created ok, -1 otherwise + */ + +int +ftime_start(void) +{ + if (ftab != NULL) + return 0; + if ((ftab = (FTM **)calloc(F_TAB_SZ, sizeof(FTM *))) == NULL) { + tty_warn(1, "Cannot allocate memory for file time table"); + return -1; + } + + /* + * get random name and create temporary scratch file, unlink name + * so it will get removed on exit + */ + memcpy(tempbase, _TFILE_BASE, sizeof(_TFILE_BASE)); + if ((ffd = mkstemp(tempfile)) == -1) { + syswarn(1, errno, "Unable to create temporary file: %s", + tempfile); + return -1; + } + + (void)unlink(tempfile); + return 0; +} + +/* + * chk_ftime() + * looks up entry in file time hash table. If not found, the file is + * added to the hash table and the file named stored in the scratch file. + * If a file with the same name is found, the file times are compared and + * the most recent file time is retained. If the new file was younger (or + * was not in the database) the new file is selected for storage. + * Return: + * 0 if file should be added to the archive, 1 if it should be skipped, + * -1 on error + */ + +int +chk_ftime(ARCHD *arcn) +{ + FTM *pt; + int namelen; + u_int indx; + char ckname[PAXPATHLEN+1]; + + /* + * no info, go ahead and add to archive + */ + if (ftab == NULL) + return 0; + + /* + * hash the pathname and look up in table + */ + namelen = arcn->nlen; + indx = st_hash(arcn->name, namelen, F_TAB_SZ); + if ((pt = ftab[indx]) != NULL) { + /* + * the hash chain is not empty, walk down looking for match + * only read up the path names if the lengths match, speeds + * up the search a lot + */ + while (pt != NULL) { + if (pt->namelen == namelen) { + /* + * potential match, have to read the name + * from the scratch file. + */ + if (lseek(ffd,pt->seek,SEEK_SET) != pt->seek) { + syswarn(1, errno, + "Failed ftime table seek"); + return -1; + } + if (xread(ffd, ckname, namelen) != namelen) { + syswarn(1, errno, + "Failed ftime table read"); + return -1; + } + + /* + * if the names match, we are done + */ + if (!strncmp(ckname, arcn->name, namelen)) + break; + } + + /* + * try the next entry on the chain + */ + pt = pt->fow; + } + + if (pt != NULL) { + /* + * found the file, compare the times, save the newer + */ + if (arcn->sb.st_mtime > pt->mtime) { + /* + * file is newer + */ + pt->mtime = arcn->sb.st_mtime; + return 0; + } + /* + * file is older + */ + return 1; + } + } + + /* + * not in table, add it + */ + if ((pt = (FTM *)malloc(sizeof(FTM))) != NULL) { + /* + * add the name at the end of the scratch file, saving the + * offset. add the file to the head of the hash chain + */ + if ((pt->seek = lseek(ffd, (off_t)0, SEEK_END)) >= 0) { + if (xwrite(ffd, arcn->name, namelen) == namelen) { + pt->mtime = arcn->sb.st_mtime; + pt->namelen = namelen; + pt->fow = ftab[indx]; + ftab[indx] = pt; + return 0; + } + syswarn(1, errno, "Failed write to file time table"); + } else + syswarn(1, errno, "Failed seek on file time table"); + } else + tty_warn(1, "File time table ran out of memory"); + + if (pt != NULL) + (void)free((char *)pt); + return -1; +} + +/* + * Interactive rename table routines + * + * The interactive rename table keeps track of the new names that the user + * assigns to files from tty input. Since this map is unique for each file + * we must store it in case there is a reference to the file later in archive + * (a link). Otherwise we will be unable to find the file we know was + * extracted. The remapping of these files is stored in a memory based hash + * table (it is assumed since input must come from /dev/tty, it is unlikely to + * be a very large table). + */ + +/* + * name_start() + * create the interactive rename table + * Return: + * 0 if successful, -1 otherwise + */ + +int +name_start(void) +{ + if (ntab != NULL) + return 0; + if ((ntab = (NAMT **)calloc(N_TAB_SZ, sizeof(NAMT *))) == NULL) { + tty_warn(1, + "Cannot allocate memory for interactive rename table"); + return -1; + } + return 0; +} + +/* + * add_name() + * add the new name to old name mapping just created by the user. + * If an old name mapping is found (there may be duplicate names on an + * archive) only the most recent is kept. + * Return: + * 0 if added, -1 otherwise + */ + +int +add_name(char *oname, int onamelen, char *nname) +{ + NAMT *pt; + u_int indx; + + if (ntab == NULL) { + /* + * should never happen + */ + tty_warn(0, "No interactive rename table, links may fail\n"); + return 0; + } + + /* + * look to see if we have already mapped this file, if so we + * will update it + */ + indx = st_hash(oname, onamelen, N_TAB_SZ); + if ((pt = ntab[indx]) != NULL) { + /* + * look down the has chain for the file + */ + while ((pt != NULL) && (strcmp(oname, pt->oname) != 0)) + pt = pt->fow; + + if (pt != NULL) { + /* + * found an old mapping, replace it with the new one + * the user just input (if it is different) + */ + if (strcmp(nname, pt->nname) == 0) + return 0; + + (void)free((char *)pt->nname); + if ((pt->nname = strdup(nname)) == NULL) { + tty_warn(1, "Cannot update rename table"); + return -1; + } + return 0; + } + } + + /* + * this is a new mapping, add it to the table + */ + if ((pt = (NAMT *)malloc(sizeof(NAMT))) != NULL) { + if ((pt->oname = strdup(oname)) != NULL) { + if ((pt->nname = strdup(nname)) != NULL) { + pt->fow = ntab[indx]; + ntab[indx] = pt; + return 0; + } + (void)free((char *)pt->oname); + } + (void)free((char *)pt); + } + tty_warn(1, "Interactive rename table out of memory"); + return -1; +} + +/* + * sub_name() + * look up a link name to see if it points at a file that has been + * remapped by the user. If found, the link is adjusted to contain the + * new name (oname is the link to name) + */ + +void +sub_name(char *oname, int *onamelen, size_t onamesize) +{ + NAMT *pt; + u_int indx; + + if (ntab == NULL) + return; + /* + * look the name up in the hash table + */ + indx = st_hash(oname, *onamelen, N_TAB_SZ); + if ((pt = ntab[indx]) == NULL) + return; + + while (pt != NULL) { + /* + * walk down the hash chain looking for a match + */ + if (strcmp(oname, pt->oname) == 0) { + /* + * found it, replace it with the new name + * and return (we know that oname has enough space) + */ + *onamelen = strlcpy(oname, pt->nname, onamesize); + return; + } + pt = pt->fow; + } + + /* + * no match, just return + */ + return; +} + +/* + * device/inode mapping table routines + * (used with formats that store device and inodes fields) + * + * device/inode mapping tables remap the device field in an archive header. The + * device/inode fields are used to determine when files are hard links to each + * other. However these values have very little meaning outside of that. This + * database is used to solve one of two different problems. + * + * 1) when files are appended to an archive, while the new files may have hard + * links to each other, you cannot determine if they have hard links to any + * file already stored on the archive from a prior run of pax. We must assume + * that these inode/device pairs are unique only within a SINGLE run of pax + * (which adds a set of files to an archive). So we have to make sure the + * inode/dev pairs we add each time are always unique. We do this by observing + * while the inode field is very dense, the use of the dev field is fairly + * sparse. Within each run of pax, we remap any device number of a new archive + * member that has a device number used in a prior run and already stored in a + * file on the archive. During the read phase of the append, we store the + * device numbers used and mark them to not be used by any file during the + * write phase. If during write we go to use one of those old device numbers, + * we remap it to a new value. + * + * 2) Often the fields in the archive header used to store these values are + * too small to store the entire value. The result is an inode or device value + * which can be truncated. This really can foul up an archive. With truncation + * we end up creating links between files that are really not links (after + * truncation the inodes are the same value). We address that by detecting + * truncation and forcing a remap of the device field to split truncated + * inodes away from each other. Each truncation creates a pattern of bits that + * are removed. We use this pattern of truncated bits to partition the inodes + * on a single device to many different devices (each one represented by the + * truncated bit pattern). All inodes on the same device that have the same + * truncation pattern are mapped to the same new device. Two inodes that + * truncate to the same value clearly will always have different truncation + * bit patterns, so they will be split from away each other. When we spot + * device truncation we remap the device number to a non truncated value. + * (for more info see table.h for the data structures involved). + */ + +/* + * dev_start() + * create the device mapping table + * Return: + * 0 if successful, -1 otherwise + */ + +int +dev_start(void) +{ + if (dtab != NULL) + return 0; + if ((dtab = (DEVT **)calloc(D_TAB_SZ, sizeof(DEVT *))) == NULL) { + tty_warn(1, "Cannot allocate memory for device mapping table"); + return -1; + } + return 0; +} + +/* + * add_dev() + * add a device number to the table. this will force the device to be + * remapped to a new value if it be used during a write phase. This + * function is called during the read phase of an append to prohibit the + * use of any device number already in the archive. + * Return: + * 0 if added ok, -1 otherwise + */ + +int +add_dev(ARCHD *arcn) +{ + if (chk_dev(arcn->sb.st_dev, 1) == NULL) + return -1; + return 0; +} + +/* + * chk_dev() + * check for a device value in the device table. If not found and the add + * flag is set, it is added. This does NOT assign any mapping values, just + * adds the device number as one that need to be remapped. If this device + * is already mapped, just return with a pointer to that entry. + * Return: + * pointer to the entry for this device in the device map table. Null + * if the add flag is not set and the device is not in the table (it is + * not been seen yet). If add is set and the device cannot be added, null + * is returned (indicates an error). + */ + +static DEVT * +chk_dev(dev_t dev, int add) +{ + DEVT *pt; + u_int indx; + + if (dtab == NULL) + return NULL; + /* + * look to see if this device is already in the table + */ + indx = ((unsigned)dev) % D_TAB_SZ; + if ((pt = dtab[indx]) != NULL) { + while ((pt != NULL) && (pt->dev != dev)) + pt = pt->fow; + + /* + * found it, return a pointer to it + */ + if (pt != NULL) + return pt; + } + + /* + * not in table, we add it only if told to as this may just be a check + * to see if a device number is being used. + */ + if (add == 0) + return NULL; + + /* + * allocate a node for this device and add it to the front of the hash + * chain. Note we do not assign remaps values here, so the pt->list + * list must be NULL. + */ + if ((pt = (DEVT *)malloc(sizeof(DEVT))) == NULL) { + tty_warn(1, "Device map table out of memory"); + return NULL; + } + pt->dev = dev; + pt->list = NULL; + pt->fow = dtab[indx]; + dtab[indx] = pt; + return pt; +} +/* + * map_dev() + * given an inode and device storage mask (the mask has a 1 for each bit + * the archive format is able to store in a header), we check for inode + * and device truncation and remap the device as required. Device mapping + * can also occur when during the read phase of append a device number was + * seen (and was marked as do not use during the write phase). WE ASSUME + * that unsigned longs are the same size or bigger than the fields used + * for ino_t and dev_t. If not the types will have to be changed. + * Return: + * 0 if all ok, -1 otherwise. + */ + +int +map_dev(ARCHD *arcn, u_long dev_mask, u_long ino_mask) +{ + DEVT *pt; + DLIST *dpt; + static dev_t lastdev = 0; /* next device number to try */ + int trc_ino = 0; + int trc_dev = 0; + ino_t trunc_bits = 0; + ino_t nino; + + if (dtab == NULL) + return 0; + /* + * check for device and inode truncation, and extract the truncated + * bit pattern. + */ + if ((arcn->sb.st_dev & (dev_t)dev_mask) != arcn->sb.st_dev) + ++trc_dev; + if ((nino = arcn->sb.st_ino & (ino_t)ino_mask) != arcn->sb.st_ino) { + ++trc_ino; + trunc_bits = arcn->sb.st_ino & (ino_t)(~ino_mask); + } + + /* + * see if this device is already being mapped, look up the device + * then find the truncation bit pattern which applies + */ + if ((pt = chk_dev(arcn->sb.st_dev, 0)) != NULL) { + /* + * this device is already marked to be remapped + */ + for (dpt = pt->list; dpt != NULL; dpt = dpt->fow) + if (dpt->trunc_bits == trunc_bits) + break; + + if (dpt != NULL) { + /* + * we are being remapped for this device and pattern + * change the device number to be stored and return + */ + arcn->sb.st_dev = dpt->dev; + arcn->sb.st_ino = nino; + return 0; + } + } else { + /* + * this device is not being remapped YET. if we do not have any + * form of truncation, we do not need a remap + */ + if (!trc_ino && !trc_dev) + return 0; + + /* + * we have truncation, have to add this as a device to remap + */ + if ((pt = chk_dev(arcn->sb.st_dev, 1)) == NULL) + goto bad; + + /* + * if we just have a truncated inode, we have to make sure that + * all future inodes that do not truncate (they have the + * truncation pattern of all 0's) continue to map to the same + * device number. We probably have already written inodes with + * this device number to the archive with the truncation + * pattern of all 0's. So we add the mapping for all 0's to the + * same device number. + */ + if (!trc_dev && (trunc_bits != 0)) { + if ((dpt = (DLIST *)malloc(sizeof(DLIST))) == NULL) + goto bad; + dpt->trunc_bits = 0; + dpt->dev = arcn->sb.st_dev; + dpt->fow = pt->list; + pt->list = dpt; + } + } + + /* + * look for a device number not being used. We must watch for wrap + * around on lastdev (so we do not get stuck looking forever!) + */ + while (++lastdev > 0) { + if (chk_dev(lastdev, 0) != NULL) + continue; + /* + * found an unused value. If we have reached truncation point + * for this format we are hosed, so we give up. Otherwise we + * mark it as being used. + */ + if (((lastdev & ((dev_t)dev_mask)) != lastdev) || + (chk_dev(lastdev, 1) == NULL)) + goto bad; + break; + } + + if ((lastdev <= 0) || ((dpt = (DLIST *)malloc(sizeof(DLIST))) == NULL)) + goto bad; + + /* + * got a new device number, store it under this truncation pattern. + * change the device number this file is being stored with. + */ + dpt->trunc_bits = trunc_bits; + dpt->dev = lastdev; + dpt->fow = pt->list; + pt->list = dpt; + arcn->sb.st_dev = lastdev; + arcn->sb.st_ino = nino; + return 0; + + bad: + tty_warn(1, + "Unable to fix truncated inode/device field when storing %s", + arcn->name); + tty_warn(0, "Archive may create improper hard links when extracted"); + return 0; +} + +/* + * directory access/mod time reset table routines (for directories READ by pax) + * + * The pax -t flag requires that access times of archive files to be the same + * as before being read by pax. For regular files, access time is restored after + * the file has been copied. This database provides the same functionality for + * directories read during file tree traversal. Restoring directory access time + * is more complex than files since directories may be read several times until + * all the descendants in their subtree are visited by fts. Directory access + * and modification times are stored during the fts pre-order visit (done + * before any descendants in the subtree is visited) and restored after the + * fts post-order visit (after all the descendants have been visited). In the + * case of premature exit from a subtree (like from the effects of -n), any + * directory entries left in this database are reset during final cleanup + * operations of pax. Entries are hashed by inode number for fast lookup. + */ + +/* + * atdir_start() + * create the directory access time database for directories READ by pax. + * Return: + * 0 is created ok, -1 otherwise. + */ + +int +atdir_start(void) +{ + if (atab != NULL) + return 0; + if ((atab = (ATDIR **)calloc(A_TAB_SZ, sizeof(ATDIR *))) == NULL) { + tty_warn(1, + "Cannot allocate space for directory access time table"); + return -1; + } + return 0; +} + + +/* + * atdir_end() + * walk through the directory access time table and reset the access time + * of any directory who still has an entry left in the database. These + * entries are for directories READ by pax + */ + +void +atdir_end(void) +{ + ATDIR *pt; + int i; + + if (atab == NULL) + return; + /* + * for each non-empty hash table entry reset all the directories + * chained there. + */ + for (i = 0; i < A_TAB_SZ; ++i) { + if ((pt = atab[i]) == NULL) + continue; + /* + * remember to force the times, set_ftime() looks at pmtime + * and patime, which only applies to things CREATED by pax, + * not read by pax. Read time reset is controlled by -t. + */ + for (; pt != NULL; pt = pt->fow) + set_ftime(pt->name, pt->mtime, pt->atime, 1, 0); + } +} + +/* + * add_atdir() + * add a directory to the directory access time table. Table is hashed + * and chained by inode number. This is for directories READ by pax + */ + +void +add_atdir(char *fname, dev_t dev, ino_t ino, time_t mtime, time_t atime) +{ + ATDIR *pt; + u_int indx; + + if (atab == NULL) + return; + + /* + * make sure this directory is not already in the table, if so just + * return (the older entry always has the correct time). The only + * way this will happen is when the same subtree can be traversed by + * different args to pax and the -n option is aborting fts out of a + * subtree before all the post-order visits have been made. + */ + indx = ((unsigned)ino) % A_TAB_SZ; + if ((pt = atab[indx]) != NULL) { + while (pt != NULL) { + if ((pt->ino == ino) && (pt->dev == dev)) + break; + pt = pt->fow; + } + + /* + * oops, already there. Leave it alone. + */ + if (pt != NULL) + return; + } + + /* + * add it to the front of the hash chain + */ + if ((pt = (ATDIR *)malloc(sizeof(ATDIR))) != NULL) { + if ((pt->name = strdup(fname)) != NULL) { + pt->dev = dev; + pt->ino = ino; + pt->mtime = mtime; + pt->atime = atime; + pt->fow = atab[indx]; + atab[indx] = pt; + return; + } + (void)free((char *)pt); + } + + tty_warn(1, "Directory access time reset table ran out of memory"); + return; +} + +/* + * get_atdir() + * look up a directory by inode and device number to obtain the access + * and modification time you want to set to. If found, the modification + * and access time parameters are set and the entry is removed from the + * table (as it is no longer needed). These are for directories READ by + * pax + * Return: + * 0 if found, -1 if not found. + */ + +int +get_atdir(dev_t dev, ino_t ino, time_t *mtime, time_t *atime) +{ + ATDIR *pt; + ATDIR **ppt; + u_int indx; + + if (atab == NULL) + return -1; + /* + * hash by inode and search the chain for an inode and device match + */ + indx = ((unsigned)ino) % A_TAB_SZ; + if ((pt = atab[indx]) == NULL) + return -1; + + ppt = &(atab[indx]); + while (pt != NULL) { + if ((pt->ino == ino) && (pt->dev == dev)) + break; + /* + * no match, go to next one + */ + ppt = &(pt->fow); + pt = pt->fow; + } + + /* + * return if we did not find it. + */ + if (pt == NULL) + return -1; + + /* + * found it. return the times and remove the entry from the table. + */ + *ppt = pt->fow; + *mtime = pt->mtime; + *atime = pt->atime; + (void)free((char *)pt->name); + (void)free((char *)pt); + return 0; +} + +/* + * directory access mode and time storage routines (for directories CREATED + * by pax). + * + * Pax requires that extracted directories, by default, have their access/mod + * times and permissions set to the values specified in the archive. During the + * actions of extracting (and creating the destination subtree during -rw copy) + * directories extracted may be modified after being created. Even worse is + * that these directories may have been created with file permissions which + * prohibits any descendants of these directories from being extracted. When + * directories are created by pax, access rights may be added to permit the + * creation of files in their subtree. Every time pax creates a directory, the + * times and file permissions specified by the archive are stored. After all + * files have been extracted (or copied), these directories have their times + * and file modes reset to the stored values. The directory info is restored in + * reverse order as entries were added to the data file from root to leaf. To + * restore atime properly, we must go backwards. The data file consists of + * records with two parts, the file name followed by a DIRDATA trailer. The + * fixed sized trailer contains the size of the name plus the off_t location in + * the file. To restore we work backwards through the file reading the trailer + * then the file name. + */ + +#ifndef DIRS_USE_FILE +static DIRDATA *dirdata_head; +#endif + +/* + * dir_start() + * set up the directory time and file mode storage for directories CREATED + * by pax. + * Return: + * 0 if ok, -1 otherwise + */ + +int +dir_start(void) +{ +#ifdef DIRS_USE_FILE + if (dirfd != -1) + return 0; + + /* + * unlink the file so it goes away at termination by itself + */ + memcpy(tempbase, _TFILE_BASE, sizeof(_TFILE_BASE)); + if ((dirfd = mkstemp(tempfile)) >= 0) { + (void)unlink(tempfile); + return 0; + } + tty_warn(1, "Unable to create temporary file for directory times: %s", + tempfile); + return -1; +#else + return (0); +#endif /* DIRS_USE_FILE */ +} + +/* + * add_dir() + * add the mode and times for a newly CREATED directory + * name is name of the directory, psb the stat buffer with the data in it, + * frc_mode is a flag that says whether to force the setting of the mode + * (ignoring the user set values for preserving file mode). Frc_mode is + * for the case where we created a file and found that the resulting + * directory was not writable and the user asked for file modes to NOT + * be preserved. (we have to preserve what was created by default, so we + * have to force the setting at the end. this is stated explicitly in the + * pax spec) + */ + +void +add_dir(char *name, int nlen, struct stat *psb, int frc_mode) +{ +#ifdef DIRS_USE_FILE + DIRDATA dblk; +#else + DIRDATA *dblk; +#endif + char realname[MAXPATHLEN], *rp; + + if (havechd && *name != '/') { + if ((rp = realpath(name, realname)) == NULL) { + tty_warn(1, "Cannot canonicalize %s", name); + return; + } + name = rp; +#ifdef DIRS_USE_FILE + nlen = strlen(name); +#endif + } + +#ifdef DIRS_USE_FILE + if (dirfd < 0) + return; + + /* + * get current position (where file name will start) so we can store it + * in the trailer + */ + if ((dblk.npos = lseek(dirfd, 0L, SEEK_CUR)) < 0) { + tty_warn(1, + "Unable to store mode and times for directory: %s",name); + return; + } + + /* + * write the file name followed by the trailer + */ + dblk.nlen = nlen + 1; + dblk.mode = psb->st_mode & 0xffff; + dblk.mtime = psb->st_mtime; + dblk.atime = psb->st_atime; +#if HAVE_STRUCT_STAT_ST_FLAGS + dblk.fflags = psb->st_flags; +#else + dblk.fflags = 0; +#endif + dblk.frc_mode = frc_mode; + if ((xwrite(dirfd, name, dblk.nlen) == dblk.nlen) && + (xwrite(dirfd, (char *)&dblk, sizeof(dblk)) == sizeof(dblk))) { + ++dircnt; + return; + } + + tty_warn(1, + "Unable to store mode and times for created directory: %s",name); + return; +#else + + if ((dblk = malloc(sizeof(*dblk))) == NULL || + (dblk->name = strdup(name)) == NULL) { + tty_warn(1, + "Unable to store mode and times for directory: %s",name); + if (dblk != NULL) + free(dblk); + return; + } + + dblk->mode = psb->st_mode & 0xffff; + dblk->mtime = psb->st_mtime; + dblk->atime = psb->st_atime; +#if HAVE_STRUCT_STAT_ST_FLAGS + dblk->fflags = psb->st_flags; +#else + dblk->fflags = 0; +#endif + dblk->frc_mode = frc_mode; + + dblk->next = dirdata_head; + dirdata_head = dblk; + return; +#endif /* DIRS_USE_FILE */ +} + +/* + * proc_dir() + * process all file modes and times stored for directories CREATED + * by pax + */ + +void +proc_dir(void) +{ +#ifdef DIRS_USE_FILE + char name[PAXPATHLEN+1]; + DIRDATA dblk; + u_long cnt; + + if (dirfd < 0) + return; + /* + * read backwards through the file and process each directory + */ + for (cnt = 0; cnt < dircnt; ++cnt) { + /* + * read the trailer, then the file name, if this fails + * just give up. + */ + if (lseek(dirfd, -((off_t)sizeof(dblk)), SEEK_CUR) < 0) + break; + if (xread(dirfd,(char *)&dblk, sizeof(dblk)) != sizeof(dblk)) + break; + if (lseek(dirfd, dblk.npos, SEEK_SET) < 0) + break; + if (xread(dirfd, name, dblk.nlen) != dblk.nlen) + break; + if (lseek(dirfd, dblk.npos, SEEK_SET) < 0) + break; + + /* + * frc_mode set, make sure we set the file modes even if + * the user didn't ask for it (see file_subs.c for more info) + */ + if (pmode || dblk.frc_mode) + set_pmode(name, dblk.mode); + if (patime || pmtime) + set_ftime(name, dblk.mtime, dblk.atime, 0, 0); + if (pfflags) + set_chflags(name, dblk.fflags); + } + + (void)close(dirfd); + dirfd = -1; + if (cnt != dircnt) + tty_warn(1, + "Unable to set mode and times for created directories"); + return; +#else + DIRDATA *dblk; + + for (dblk = dirdata_head; dblk != NULL; dblk = dirdata_head) { + dirdata_head = dblk->next; + + /* + * frc_mode set, make sure we set the file modes even if + * the user didn't ask for it (see file_subs.c for more info) + */ + if (pmode || dblk->frc_mode) + set_pmode(dblk->name, dblk->mode); + if (patime || pmtime) + set_ftime(dblk->name, dblk->mtime, dblk->atime, 0, 0); + if (pfflags) + set_chflags(dblk->name, dblk->fflags); + + free(dblk->name); + free(dblk); + } +#endif /* DIRS_USE_FILE */ +} + +/* + * database independent routines + */ + +/* + * st_hash() + * hashes filenames to a u_int for hashing into a table. Looks at the tail + * end of file, as this provides far better distribution than any other + * part of the name. For performance reasons we only care about the last + * MAXKEYLEN chars (should be at LEAST large enough to pick off the file + * name). Was tested on 500,000 name file tree traversal from the root + * and gave almost a perfectly uniform distribution of keys when used with + * prime sized tables (MAXKEYLEN was 128 in test). Hashes (sizeof int) + * chars at a time and pads with 0 for last addition. + * Return: + * the hash value of the string MOD (%) the table size. + */ + +u_int +st_hash(char *name, int len, int tabsz) +{ + char *pt; + char *dest; + char *end; + int i; + u_int key = 0; + int steps; + int res; + u_int val; + + /* + * only look at the tail up to MAXKEYLEN, we do not need to waste + * time here (remember these are pathnames, the tail is what will + * spread out the keys) + */ + if (len > MAXKEYLEN) { + pt = &(name[len - MAXKEYLEN]); + len = MAXKEYLEN; + } else + pt = name; + + /* + * calculate the number of u_int size steps in the string and if + * there is a runt to deal with + */ + steps = len/sizeof(u_int); + res = len % sizeof(u_int); + + /* + * add up the value of the string in unsigned integer sized pieces + * too bad we cannot have unsigned int aligned strings, then we + * could avoid the expensive copy. + */ + for (i = 0; i < steps; ++i) { + end = pt + sizeof(u_int); + dest = (char *)&val; + while (pt < end) + *dest++ = *pt++; + key += val; + } + + /* + * add in the runt padded with zero to the right + */ + if (res) { + val = 0; + end = pt + res; + dest = (char *)&val; + while (pt < end) + *dest++ = *pt++; + key += val; + } + + /* + * return the result mod the table size + */ + return key % tabsz; +} diff --git a/bin/pax/tables.h b/bin/pax/tables.h new file mode 100644 index 0000000..1038589 --- /dev/null +++ b/bin/pax/tables.h @@ -0,0 +1,176 @@ +/* $NetBSD: tables.h,v 1.10 2007/04/29 20:23:34 msaitoh Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tables.h 8.1 (Berkeley) 5/31/93 + */ + +/* + * data structures and constants used by the different databases kept by pax + */ + +/* + * Hash Table Sizes MUST BE PRIME, if set too small performance suffers. + * Probably safe to expect 500000 inodes per tape. Assuming good key + * distribution (inodes) chains of under 50 long (worse case) is ok. + */ +#define L_TAB_SZ 2503 /* hard link hash table size */ +#define F_TAB_SZ 50503 /* file time hash table size */ +#define N_TAB_SZ 541 /* interactive rename hash table */ +#define D_TAB_SZ 317 /* unique device mapping table */ +#define A_TAB_SZ 317 /* ftree dir access time reset table */ +#define MAXKEYLEN 64 /* max number of chars for hash */ + +/* + * file hard link structure (hashed by dev/ino and chained) used to find the + * hard links in a file system or with some archive formats (cpio) + */ +typedef struct hrdlnk { + char *name; /* name of first file seen with this ino/dev */ + dev_t dev; /* files device number */ + ino_t ino; /* files inode number */ + u_long nlink; /* expected link count */ + struct hrdlnk *fow; +} HRDLNK; + +/* + * Archive write update file time table (the -u, -C flag), hashed by filename. + * Filenames are stored in a scratch file at seek offset into the file. The + * file time (mod time) and the file name length (for a quick check) are + * stored in a hash table node. We were forced to use a scratch file because + * with -u, the mtime for every node in the archive must always be available + * to compare against (and this data can get REALLY large with big archives). + * By being careful to read only when we have a good chance of a match, the + * performance loss is not measurable (and the size of the archive we can + * handle is greatly increased). + */ +typedef struct ftm { + int namelen; /* file name length */ + time_t mtime; /* files last modification time */ + off_t seek; /* location in scratch file */ + struct ftm *fow; +} FTM; + +/* + * Interactive rename table (-i flag), hashed by orig filename. + * We assume this will not be a large table as this mapping data can only be + * obtained through interactive input by the user. Nobody is going to type in + * changes for 500000 files? We use chaining to resolve collisions. + */ + +typedef struct namt { + char *oname; /* old name */ + char *nname; /* new name typed in by the user */ + struct namt *fow; +} NAMT; + +/* + * Unique device mapping tables. Some protocols (e.g. cpio) require that the + * pair will uniquely identify a file in an archive unless they + * are links to the same file. Appending to archives can break this. For those + * protocols that have this requirement we map c_dev to a unique value not seen + * in the archive when we append. We also try to handle inode truncation with + * this table. (When the inode field in the archive header are too small, we + * remap the dev on writes to remove accidental collisions). + * + * The list is hashed by device number using chain collision resolution. Off of + * each DEVT are linked the various remaps for this device based on those bits + * in the inode which were truncated. For example if we are just remapping to + * avoid a device number during an update append, off the DEVT we would have + * only a single DLIST that has a truncation id of 0 (no inode bits were + * stripped for this device so far). When we spot inode truncation we create + * a new mapping based on the set of bits in the inode which were stripped off. + * so if the top four bits of the inode are stripped and they have a pattern of + * 0110...... (where . are those bits not truncated) we would have a mapping + * assigned for all inodes that has the same 0110.... pattern (with this dev + * number of course). This keeps the mapping sparse and should be able to store + * close to the limit of files which can be represented by the optimal + * combination of dev and inode bits, and without creating a fouled up archive. + * Note we also remap truncated devs in the same way (an exercise for the + * dedicated reader; always wanted to say that...:) + */ + +typedef struct devt { + dev_t dev; /* the orig device number we now have to map */ + struct devt *fow; /* new device map list */ + struct dlist *list; /* map list based on inode truncation bits */ +} DEVT; + +typedef struct dlist { + ino_t trunc_bits; /* truncation pattern for a specific map */ + dev_t dev; /* the new device id we use */ + struct dlist *fow; +} DLIST; + +/* + * ftree directory access time reset table. When we are done with a + * subtree we reset the access and mod time of the directory when the tflag is + * set. Not really explicitly specified in the pax spec, but easy and fast to + * do (and this may have even been intended in the spec, it is not clear). + * table is hashed by inode with chaining. + */ + +typedef struct atdir { + char *name; /* name of directory to reset */ + dev_t dev; /* dev and inode for fast lookup */ + ino_t ino; + time_t mtime; /* access and mod time to reset to */ + time_t atime; + struct atdir *fow; +} ATDIR; + +/* + * created directory time and mode storage entry. After pax is finished during + * extraction or copy, we must reset directory access modes and times that + * may have been modified after creation (they no longer have the specified + * times and/or modes). We must reset time in the reverse order of creation, + * because entries are added from the top of the file tree to the bottom. + * We MUST reset times from leaf to root (it will not work the other + * direction). Entries are recorded into a spool file to make reverse + * reading faster. + */ + +typedef struct dirdata { +#ifdef DIRS_USE_FILE + int nlen; /* length of the directory name (includes \0) */ + off_t npos; /* position in file where this dir name starts */ +#else + char *name; /* file name */ + struct dirdata *next; +#endif + mode_t mode; /* file mode to restore */ + time_t mtime; /* mtime to set */ + time_t atime; /* atime to set */ + long fflags; /* file flags to set */ + int frc_mode; /* do we force mode settings? */ +} DIRDATA; diff --git a/bin/pax/tar.1 b/bin/pax/tar.1 new file mode 100644 index 0000000..f98a138 --- /dev/null +++ b/bin/pax/tar.1 @@ -0,0 +1,372 @@ +.\" $NetBSD: tar.1,v 1.37 2017/07/03 21:33:23 wiz Exp $ +.\" +.\" Copyright (c) 1996 SigmaSoft, Th. Lockert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.\" OpenBSD: tar.1,v 1.28 2000/11/09 23:58:56 aaron Exp +.\" +.Dd December 19, 2015 +.Dt TAR 1 +.Os +.Sh NAME +.Nm tar +.Nd tape archiver +.Sh SYNOPSIS +.Nm tar +.Sm off +.Oo \&- Oc {crtux} Op Fl 014578befHhJjklmOoPpqSvwXZz +.Sm on +.Op Ar archive +.Op Ar blocksize +.\" XXX how to do this right? +.Op Fl C Ar directory +.Op Fl s Ar replstr +.Op Fl T Ar file +.Op Ar file ... +.Sh DESCRIPTION +The +.Nm +command creates, adds files to, or extracts files from an +archive file in +.Dq tar +format. +A tar archive is often stored on a magnetic tape, but can be +stored equally well on a floppy, CD-ROM, or in a regular disk file. +.Pp +One of the following flags must be present: +.Bl -tag -width Ar +.It Fl c , Fl Fl create +Create new archive, or overwrite an existing archive, +adding the specified files to it. +.It Fl r , Fl Fl append +Append the named new files to existing archive. +Note that this will only work on media on which an end-of-file mark +can be overwritten. +.It Fl t , Fl Fl list +List contents of archive. +If any files are named on the +command line, only those files will be listed. +.It Fl u , Fl Fl update +Alias for +.Fl r . +.It Fl x , Fl Fl extract , Fl Fl get +Extract files from archive. +If any files are named on the +command line, only those files will be extracted from the +archive. +If more than one copy of a file exists in the +archive, later copies will overwrite earlier copies during +extraction. +The file mode and modification time are preserved +if possible. +The file mode is subject to modification by the +.Xr umask 2 . +.El +.Pp +In addition to the flags mentioned above, any of the following +flags may be used: +.Bl -tag -width Ar +.It Fl b Ar "blocking factor" , Fl Fl block-size Ar "blocking factor" +Set blocking factor to use for the archive. +.Nm +uses 512 byte blocks. +The default is 20, the maximum is 126. +Archives with a blocking factor larger 63 violate the +.Tn POSIX +standard and will not be portable to all systems. +.It Fl e +Stop after first error. +.It Fl f Ar archive , Fl Fl file Ar archive +Filename where the archive is stored. +Defaults to +.Pa /dev/rst0 . +If the archive is of the form: +.Ar [[user@]host:]file +then the archive will be processed using +.Xr rmt 8 . +.It Fl h , Fl Fl dereference +Follow symbolic links as if they were normal files +or directories. +.It Fl J, Fl Fl xz +Compress/decompress archive using +.Xr xz 1 . +.It Fl j, Fl Fl bzip2, Fl Fl bunzip2 +Use +.Xr bzip2 1 +for compression of the archive. +This option is a GNU extension. +.It Fl k , Fl Fl keep-old-files +Keep existing files; don't overwrite them from archive. +.It Fl l , Fl Fl one-file-system +Do not descend across mount points. +.\" should be '-X' +.It Fl m , Fl Fl modification-time +Do not preserve modification time. +.It Fl O +When creating and appending to an archive, write old-style (non-POSIX) archives. +When extracting from an archive, extract to standard output. +.It Fl o , Fl Fl portability , Fl Fl old-archive +Don't write directory information that the older (V7) style +.Nm +is unable to decode. +This implies the +.Fl O +flag. +.It Fl p , Fl Fl preserve-permissions , Fl Fl preserve +Preserve user and group ID as well as file mode regardless of +the current +.Xr umask 2 . +The setuid and setgid bits are only preserved if the user is +the superuser. +Only meaningful in conjunction with the +.Fl x +flag. +.It Fl q , Fl Fl fast-read +Select the first archive member that matches each +.Ar pattern +operand. +No more than one archive member is matched for each +.Ar pattern . +When members of type directory are matched, the file hierarchy rooted at that +directory is also matched. +.It Fl S , Fl Fl sparse +This flag has no effect as +.Nm +always generates sparse files. +.It Fl s Ar replstr +Modify the file or archive member names specified by the +.Ar pattern +or +.Ar file +operands according to the substitution expression +.Ar replstr , +using the syntax of the +.Xr ed 1 +utility regular expressions. +The format of these regular expressions are: +.Dl /old/new/[gps] +As in +.Xr ed 1 , +.Cm old +is a basic regular expression and +.Cm new +can contain an ampersand (&), \en (where n is a digit) back-references, +or subexpression matching. +The +.Cm old +string may also contain +.Aq Dv newline +characters. +Any non-null character can be used as a delimiter (/ is shown here). +Multiple +.Fl s +expressions can be specified. +The expressions are applied in the order they are specified on the +command line, terminating with the first successful substitution. +The optional trailing +.Cm g +continues to apply the substitution expression to the pathname substring +which starts with the first character following the end of the last successful +substitution. +The first unsuccessful substitution stops the operation of the +.Cm g +option. +The optional trailing +.Cm p +will cause the final result of a successful substitution to be written to +.Dv standard error +in the following format: +.Dl >> +File or archive member names that substitute to the empty string +are not selected and will be skipped. +The substitutions are applied by default to the destination hard and symbolic +links. +The optional trailing +.Cm s +prevents the substitutions from being performed on symbolic link destinations. +.It Fl v +Verbose operation mode. +.It Fl w , Fl Fl interactive , Fl Fl confirmation +Interactively rename files. +This option causes +.Nm +to prompt the user for the filename to use when storing or +extracting files in an archive. +.It Fl z , Fl Fl gzip , Fl Fl gunzip +Compress/decompress archive using +.Xr gzip 1 . +.It Fl B , Fl Fl read-full-blocks +Reassemble small reads into full blocks (For reading from 4.2BSD pipes). +.It Fl C Ar directory , Fl Fl directory Ar directory +This is a positional argument which sets the working directory for the +following files. +When extracting, files will be extracted into +the specified directory; when creating, the specified files will be matched +from the directory. +This argument and its parameter may also appear in a file list specified by +.Fl T . +.It Fl H +Only follow symlinks given on command line. +.Pp +Note SysVr3/i386 picked up ISC/SCO UNIX compatibility which implemented +.Dq Fl F Ar file +which was defined as obtaining a list of command line switches and files +on which to operate from the specified file, +but SunOS-5 uses +.Dq Fl I Ar file +because they use +.Sq Fl F +to mean something else. +We might someday provide SunOS-5 compatibility +but it makes little sense to confuse things with ISC/SCO compatibility. +.\".It Fl L +.\"Do not follow any symlinks (do the opposite of +.\".Fl h ). +.It Fl P , Fl Fl absolute-paths +Do not strip leading slashes +.Pq Sq / +from pathnames. +The default is to strip leading slashes. +.It Fl T Ar file , Fl Fl files-from Ar file +Read the names of files to archive or extract from the given file, one +per line. +A line may also specify the positional argument +.Dq Fl C Ar directory . +.It Fl X Ar file , Fl Fl exclude-from Ar file +Exclude files matching the shell glob patterns listed in the given file. +.\" exclude should be '-E' and '-X' should be one-file-system +.Pp +Note that it would be more standard to use this option to mean ``do not +cross filesystem mount points.'' +.It Fl Z , Fl Fl compress , Fl Fl uncompress +Compress archive using compress. +.It Fl Fl strict +Do not enable GNU tar extensions such as long filenames and long link names. +.It Fl Fl atime-preserve +Preserve file access times. +.It Fl Fl chroot +.Fn chroot +to the current directory before extracting files. +Use with +.Fl x +and +.Fl h +to make absolute symlinks relative to the current directory. +.It Fl Fl unlink +Ignored, only accepted for compatibility with other +.Nm +implementations. +.Nm +always unlinks files before creating them. +.It Fl Fl use-compress-program Ar program +Use the named program as the program to decompress the input. +.It Fl Fl force-local +Do not interpret filenames that contain a +.Sq \&: +as remote files. +.It Fl Fl insecure +Normally +.Nm +ignores filenames that contain +.Dq .. +as a path component. +With this option, files that contain +.Dq .. +can be processed. +.It Fl Fl no-recursion +Cause files of type directory being copied or archived, or archive members of +type directory being extracted, to match only the directory file or archive +member and not the file hierarchy rooted at the directory. +.It Fl Fl timestamp Ar timestamp +Store all modification times in the archive with the +.Ar timestamp +given instead of the actual modification time of the individual archive member +so that repeatable builds are possible. +The +.Ar timestamp +can be a +.Pa pathname , +where the timestamps are derived from that file, a parseable date for +.Xr parsedate 3 +(this option is not yet available in the tools build), or an integer value +interpreted as the number of seconds from the Epoch. +.El +.Pp +The options +.Op Fl 014578 +can be used to select one of the compiled-in backup devices, +.Pa /dev/rstN . +.Sh FILES +.Bl -tag -width "/dev/rst0" +.It Pa /dev/rst0 +default archive name +.El +.Sh DIAGNOSTICS +.Nm +will exit with one of the following values: +.Bl -tag -width 2n +.It 0 +All files were processed successfully. +.It 1 +An error occurred. +.El +.Pp +Whenever +.Nm +cannot create a file or a link when extracting an archive or cannot +find a file while writing an archive, or cannot preserve the user +ID, group ID, file mode, or access and modification times when the +.Fl p +option is specified, a diagnostic message is written to standard +error and a non-zero exit value will be returned, but processing +will continue. +In the case where +.Nm +cannot create a link to a file, +.Nm +will not create a second copy of the file. +.Pp +If the extraction of a file from an archive is prematurely terminated +by a signal or error, +.Nm +may have only partially extracted the file the user wanted. +Additionally, the file modes of extracted files and directories may +have incorrect file bits, and the modification and access times may +be wrong. +.Pp +If the creation of an archive is prematurely terminated by a signal +or error, +.Nm +may have only partially created the archive which may violate the +specific archive format specification. +.Sh SEE ALSO +.Xr cpio 1 , +.Xr pax 1 +.Sh HISTORY +A +.Nm +command first appeared in +.At v7 . +.Sh AUTHORS +.An Keith Muller +at the University of California, San Diego. diff --git a/bin/pax/tar.c b/bin/pax/tar.c new file mode 100644 index 0000000..ded2ad4 --- /dev/null +++ b/bin/pax/tar.c @@ -0,0 +1,1430 @@ +/* $NetBSD: tar.c,v 1.74 2018/11/30 00:53:11 christos Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +#if !defined(lint) +#if 0 +static char sccsid[] = "@(#)tar.c 8.2 (Berkeley) 4/18/94"; +#else +__RCSID("$NetBSD: tar.c,v 1.74 2018/11/30 00:53:11 christos Exp $"); +#endif +#endif /* not lint */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pax.h" +#include "extern.h" +#include "tar.h" + +extern struct stat tst; + +/* + * Routines for reading, writing and header identify of various versions of tar + */ + +static int expandname(char *, size_t, char **, size_t *, const char *, size_t); +static void longlink(ARCHD *, int); +static uint32_t tar_chksm(char *, int); +static char *name_split(char *, int); +static int u32_oct(uintmax_t, char *, int, int); +static int umax_oct(uintmax_t, char *, int, int); +static int tar_gnutar_exclude_one(const char *, size_t); +static int check_sum(char *, size_t, char *, size_t, int); + +/* + * Routines common to all versions of tar + */ + +static int tar_nodir; /* do not write dirs under old tar */ +int is_gnutar; /* behave like gnu tar; enable gnu + * extensions and skip end-of-volume + * checks + */ +static int seen_gnu_warning; /* Have we warned yet? */ +static char *gnu_hack_string; /* ././@LongLink hackery */ +static int gnu_hack_len; /* len of gnu_hack_string */ +char *gnu_name_string; /* ././@LongLink hackery name */ +char *gnu_link_string; /* ././@LongLink hackery link */ +size_t gnu_name_length; /* ././@LongLink hackery name */ +size_t gnu_link_length; /* ././@LongLink hackery link */ +static int gnu_short_trailer; /* gnu short trailer */ + +static const char LONG_LINK[] = "././@LongLink"; + +#ifdef _PAX_ +char DEV_0[] = "/dev/rst0"; +char DEV_1[] = "/dev/rst1"; +char DEV_4[] = "/dev/rst4"; +char DEV_5[] = "/dev/rst5"; +char DEV_7[] = "/dev/rst7"; +char DEV_8[] = "/dev/rst8"; +#endif + +static int +check_sum(char *hd, size_t hdlen, char *bl, size_t bllen, int quiet) +{ + uint32_t hdck, blck; + + hdck = asc_u32(hd, hdlen, OCT); + blck = tar_chksm(bl, bllen); + + if (hdck != blck) { + if (!quiet) + tty_warn(0, "Header checksum %o does not match %o", + hdck, blck); + return -1; + } + return 0; +} + + +/* + * tar_endwr() + * add the tar trailer of two null blocks + * Return: + * 0 if ok, -1 otherwise (what wr_skip returns) + */ + +int +tar_endwr(void) +{ + return wr_skip((off_t)(NULLCNT * BLKMULT)); +} + +/* + * tar_endrd() + * no cleanup needed here, just return size of trailer (for append) + * Return: + * size of trailer BLKMULT + */ + +off_t +tar_endrd(void) +{ + return (off_t)((gnu_short_trailer ? 1 : NULLCNT) * BLKMULT); +} + +/* + * tar_trail() + * Called to determine if a header block is a valid trailer. We are passed + * the block, the in_sync flag (which tells us we are in resync mode; + * looking for a valid header), and cnt (which starts at zero) which is + * used to count the number of empty blocks we have seen so far. + * Return: + * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block + * could never contain a header. + */ + +int +tar_trail(char *buf, int in_resync, int *cnt) +{ + int i; + + gnu_short_trailer = 0; + /* + * look for all zero, trailer is two consecutive blocks of zero + */ + for (i = 0; i < BLKMULT; ++i) { + if (buf[i] != '\0') + break; + } + + /* + * if not all zero it is not a trailer, but MIGHT be a header. + */ + if (i != BLKMULT) + return -1; + + /* + * When given a zero block, we must be careful! + * If we are not in resync mode, check for the trailer. Have to watch + * out that we do not mis-identify file data as the trailer, so we do + * NOT try to id a trailer during resync mode. During resync mode we + * might as well throw this block out since a valid header can NEVER be + * a block of all 0 (we must have a valid file name). + */ + if (!in_resync) { + ++*cnt; + /* + * old GNU tar (up through 1.13) only writes one block of + * trailers, so we pretend we got another + */ + if (is_gnutar) { + gnu_short_trailer = 1; + ++*cnt; + } + if (*cnt >= NULLCNT) + return 0; + } + return 1; +} + +/* + * u32_oct() + * convert an uintmax_t to an octal string. many oddball field + * termination characters are used by the various versions of tar in the + * different fields. term selects which kind to use. str is '0' padded + * at the front to len. we are unable to use only one format as many old + * tar readers are very cranky about this. + * Return: + * 0 if the number fit into the string, -1 otherwise + */ + +static int +u32_oct(uintmax_t val, char *str, int len, int term) +{ + char *pt; + uint64_t p; + + p = val & TOP_HALF; + if (p && p != TOP_HALF) + return -1; + + val &= BOTTOM_HALF; + + /* + * term selects the appropriate character(s) for the end of the string + */ + pt = str + len - 1; + switch(term) { + case 3: + *pt-- = '\0'; + break; + case 2: + *pt-- = ' '; + *pt-- = '\0'; + break; + case 1: + *pt-- = ' '; + break; + case 0: + default: + *pt-- = '\0'; + *pt-- = ' '; + break; + } + + /* + * convert and blank pad if there is space + */ + while (pt >= str) { + *pt-- = '0' + (char)(val & 0x7); + if ((val = val >> 3) == 0) + break; + } + + while (pt >= str) + *pt-- = '0'; + if (val != 0) + return -1; + return 0; +} + +/* + * umax_oct() + * convert an unsigned long long to an octal string. one of many oddball + * field termination characters are used by the various versions of tar + * in the different fields. term selects which kind to use. str is '0' + * padded at the front to len. we are unable to use only one format as + * many old tar readers are very cranky about this. + * Return: + * 0 if the number fit into the string, -1 otherwise + */ + +static int +umax_oct(uintmax_t val, char *str, int len, int term) +{ + char *pt; + + /* + * term selects the appropriate character(s) for the end of the string + */ + pt = str + len - 1; + switch(term) { + case 3: + *pt-- = '\0'; + break; + case 2: + *pt-- = ' '; + *pt-- = '\0'; + break; + case 1: + *pt-- = ' '; + break; + case 0: + default: + *pt-- = '\0'; + *pt-- = ' '; + break; + } + + /* + * convert and blank pad if there is space + */ + while (pt >= str) { + *pt-- = '0' + (char)(val & 0x7); + if ((val = val >> 3) == 0) + break; + } + + while (pt >= str) + *pt-- = '0'; + if (val != 0) + return -1; + return 0; +} + +/* + * tar_chksm() + * calculate the checksum for a tar block counting the checksum field as + * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks). + * NOTE: we use len to short circuit summing 0's on write since we ALWAYS + * pad headers with 0. + * Return: + * unsigned long checksum + */ + +static uint32_t +tar_chksm(char *blk, int len) +{ + char *stop; + char *pt; + uint32_t chksm = BLNKSUM; /* initial value is checksum field sum */ + + /* + * add the part of the block before the checksum field + */ + pt = blk; + stop = blk + CHK_OFFSET; + while (pt < stop) + chksm += (uint32_t)(*pt++ & 0xff); + /* + * move past the checksum field and keep going, spec counts the + * checksum field as the sum of 8 blanks (which is pre-computed as + * BLNKSUM). + * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding + * starts, no point in summing zero's) + */ + pt += CHK_LEN; + stop = blk + len; + while (pt < stop) + chksm += (uint32_t)(*pt++ & 0xff); + return chksm; +} + +/* + * Routines for old BSD style tar (also made portable to sysV tar) + */ + +/* + * tar_id() + * determine if a block given to us is a valid tar header (and not a USTAR + * header). We have to be on the lookout for those pesky blocks of all + * zero's. + * Return: + * 0 if a tar header, -1 otherwise + */ + +int +tar_id(char *blk, int size) +{ + HD_TAR *hd; + HD_USTAR *uhd; + static int is_ustar = -1; + + if (size < BLKMULT) + return -1; + hd = (HD_TAR *)blk; + uhd = (HD_USTAR *)blk; + + /* + * check for block of zero's first, a simple and fast test, then make + * sure this is not a ustar header by looking for the ustar magic + * cookie. We should use TMAGLEN, but some USTAR archive programs are + * wrong and create archives missing the \0. Last we check the + * checksum. If this is ok we have to assume it is a valid header. + */ + if (hd->name[0] == '\0') + return -1; + if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0) { + if (is_ustar == -1) { + is_ustar = 1; + return -1; + } else + tty_warn(0, + "Busted tar archive: has both ustar and old tar " + "records"); + } else + is_ustar = 0; + return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 1); +} + +/* + * tar_opt() + * handle tar format specific -o options + * Return: + * 0 if ok -1 otherwise + */ + +int +tar_opt(void) +{ + OPLIST *opt; + + while ((opt = opt_next()) != NULL) { + if (strcmp(opt->name, TAR_OPTION) || + strcmp(opt->value, TAR_NODIR)) { + tty_warn(1, + "Unknown tar format -o option/value pair %s=%s", + opt->name, opt->value); + tty_warn(1, + "%s=%s is the only supported tar format option", + TAR_OPTION, TAR_NODIR); + return -1; + } + + /* + * we only support one option, and only when writing + */ + if ((act != APPND) && (act != ARCHIVE)) { + tty_warn(1, "%s=%s is only supported when writing.", + opt->name, opt->value); + return -1; + } + tar_nodir = 1; + } + return 0; +} + + +/* + * tar_rd() + * extract the values out of block already determined to be a tar header. + * store the values in the ARCHD parameter. + * Return: + * 0 + */ + +int +tar_rd(ARCHD *arcn, char *buf) +{ + HD_TAR *hd; + char *pt; + + /* + * we only get proper sized buffers passed to us + */ + if (tar_id(buf, BLKMULT) < 0) + return -1; + memset(arcn, 0, sizeof(*arcn)); + arcn->org_name = arcn->name; + arcn->pat = NULL; + arcn->sb.st_nlink = 1; + + /* + * copy out the name and values in the stat buffer + */ + hd = (HD_TAR *)buf; + if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) { + arcn->nlen = expandname(arcn->name, sizeof(arcn->name), + &gnu_name_string, &gnu_name_length, hd->name, + sizeof(hd->name)); + arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), + &gnu_link_string, &gnu_link_length, hd->linkname, + sizeof(hd->linkname)); + } + arcn->sb.st_mode = (mode_t)(asc_u32(hd->mode,sizeof(hd->mode),OCT) & + 0xfff); + arcn->sb.st_uid = (uid_t)asc_u32(hd->uid, sizeof(hd->uid), OCT); + arcn->sb.st_gid = (gid_t)asc_u32(hd->gid, sizeof(hd->gid), OCT); + arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); + if (arcn->sb.st_size == -1) + return -1; + arcn->sb.st_mtime = (time_t)(int32_t)asc_u32(hd->mtime, sizeof(hd->mtime), OCT); + arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; + + /* + * have to look at the last character, it may be a '/' and that is used + * to encode this as a directory + */ + pt = &(arcn->name[arcn->nlen - 1]); + arcn->pad = 0; + arcn->skip = 0; + switch(hd->linkflag) { + case SYMTYPE: + /* + * symbolic link, need to get the link name and set the type in + * the st_mode so -v printing will look correct. + */ + arcn->type = PAX_SLK; + arcn->sb.st_mode |= S_IFLNK; + break; + case LNKTYPE: + /* + * hard link, need to get the link name, set the type in the + * st_mode and st_nlink so -v printing will look better. + */ + arcn->type = PAX_HLK; + arcn->sb.st_nlink = 2; + + /* + * no idea of what type this thing really points at, but + * we set something for printing only. + */ + arcn->sb.st_mode |= S_IFREG; + break; + case LONGLINKTYPE: + case LONGNAMETYPE: + /* + * GNU long link/file; we tag these here and let the + * pax internals deal with it -- too ugly otherwise. + */ + if (hd->linkflag != LONGLINKTYPE) + arcn->type = PAX_GLF; + else + arcn->type = PAX_GLL; + arcn->pad = TAR_PAD(arcn->sb.st_size); + arcn->skip = arcn->sb.st_size; + break; + case AREGTYPE: + case REGTYPE: + case DIRTYPE: /* see below */ + default: + /* + * If we have a trailing / this is a directory and NOT a file. + * Note: V7 tar doesn't actually have DIRTYPE, but it was + * reported that V7 archives using USTAR directories do exist. + */ + if (*pt == '/' || hd->linkflag == DIRTYPE) { + /* + * it is a directory, set the mode for -v printing + */ + arcn->type = PAX_DIR; + arcn->sb.st_mode |= S_IFDIR; + arcn->sb.st_nlink = 2; + } else { + /* + * have a file that will be followed by data. Set the + * skip value to the size field and calculate the size + * of the padding. + */ + arcn->type = PAX_REG; + arcn->sb.st_mode |= S_IFREG; + arcn->pad = TAR_PAD(arcn->sb.st_size); + arcn->skip = arcn->sb.st_size; + } + break; + } + + /* + * strip off any trailing slash. + */ + if (*pt == '/') { + *pt = '\0'; + --arcn->nlen; + } + return 0; +} + +/* + * tar_wr() + * write a tar header for the file specified in the ARCHD to the archive. + * Have to check for file types that cannot be stored and file names that + * are too long. Be careful of the term (last arg) to u32_oct, each field + * of tar has it own spec for the termination character(s). + * ASSUMED: space after header in header block is zero filled + * Return: + * 0 if file has data to be written after the header, 1 if file has NO + * data to write after the header, -1 if archive write failed + */ + +int +tar_wr(ARCHD *arcn) +{ + HD_TAR *hd; + int len; + uintmax_t mtime; + char hdblk[sizeof(HD_TAR)]; + + /* + * check for those file system types which tar cannot store + */ + switch(arcn->type) { + case PAX_DIR: + /* + * user asked that dirs not be written to the archive + */ + if (tar_nodir) + return 1; + break; + case PAX_CHR: + tty_warn(1, "Tar cannot archive a character device %s", + arcn->org_name); + return 1; + case PAX_BLK: + tty_warn(1, + "Tar cannot archive a block device %s", arcn->org_name); + return 1; + case PAX_SCK: + tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name); + return 1; + case PAX_FIF: + tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name); + return 1; + case PAX_SLK: + case PAX_HLK: + case PAX_HRG: + if (arcn->ln_nlen > (int)sizeof(hd->linkname)) { + tty_warn(1,"Link name too long for tar %s", + arcn->ln_name); + return 1; + } + break; + case PAX_REG: + case PAX_CTG: + default: + break; + } + + /* + * check file name len, remember extra char for dirs (the / at the end) + */ + len = arcn->nlen; + if (arcn->type == PAX_DIR) + ++len; + if (len >= (int)sizeof(hd->name)) { + tty_warn(1, "File name too long for tar %s", arcn->name); + return 1; + } + + /* + * copy the data out of the ARCHD into the tar header based on the type + * of the file. Remember many tar readers want the unused fields to be + * padded with zero. We set the linkflag field (type), the linkname + * (or zero if not used),the size, and set the padding (if any) to be + * added after the file data (0 for all other types, as they only have + * a header) + */ + memset(hdblk, 0, sizeof(hdblk)); + hd = (HD_TAR *)hdblk; + strlcpy(hd->name, arcn->name, sizeof(hd->name)); + arcn->pad = 0; + + if (arcn->type == PAX_DIR) { + /* + * directories are the same as files, except have a filename + * that ends with a /, we add the slash here. No data follows, + * dirs, so no pad. + */ + hd->linkflag = AREGTYPE; + hd->name[len-1] = '/'; + if (u32_oct((uintmax_t)0L, hd->size, sizeof(hd->size), 1)) + goto out; + } else if (arcn->type == PAX_SLK) { + /* + * no data follows this file, so no pad + */ + hd->linkflag = SYMTYPE; + strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); + if (u32_oct((uintmax_t)0L, hd->size, sizeof(hd->size), 1)) + goto out; + } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) { + /* + * no data follows this file, so no pad + */ + hd->linkflag = LNKTYPE; + strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); + if (u32_oct((uintmax_t)0L, hd->size, sizeof(hd->size), 1)) + goto out; + } else { + /* + * data follows this file, so set the pad + */ + hd->linkflag = AREGTYPE; + if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) { + tty_warn(1,"File is too large for tar %s", + arcn->org_name); + return 1; + } + arcn->pad = TAR_PAD(arcn->sb.st_size); + } + + /* + * copy those fields that are independent of the type + */ + mtime = tst.st_ino ? tst.st_mtime : arcn->sb.st_mtime; + if (u32_oct((uintmax_t)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) || + u32_oct((uintmax_t)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) || + u32_oct((uintmax_t)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) || + u32_oct(mtime, hd->mtime, sizeof(hd->mtime), 1)) + goto out; + + /* + * calculate and add the checksum, then write the header. A return of + * 0 tells the caller to now write the file data, 1 says no data needs + * to be written + */ + if (u32_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum, + sizeof(hd->chksum), 3)) + goto out; /* XXX Something's wrong here + * because a zero-byte file can + * cause this to be done and + * yet the resulting warning + * seems incorrect */ + + if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0) + return -1; + if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0) + return -1; + if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) + return 0; + return 1; + + out: + /* + * header field is out of range + */ + tty_warn(1, "Tar header field is too small for %s", arcn->org_name); + return 1; +} + +/* + * Routines for POSIX ustar + */ + +/* + * ustar_strd() + * initialization for ustar read + * Return: + * 0 if ok, -1 otherwise + */ + +int +ustar_strd(void) +{ + return 0; +} + +/* + * ustar_stwr() + * initialization for ustar write + * Return: + * 0 if ok, -1 otherwise + */ + +int +ustar_stwr(void) +{ + return 0; +} + +/* + * ustar_id() + * determine if a block given to us is a valid ustar header. We have to + * be on the lookout for those pesky blocks of all zero's + * Return: + * 0 if a ustar header, -1 otherwise + */ + +int +ustar_id(char *blk, int size) +{ + HD_USTAR *hd; + + if (size < BLKMULT) + return -1; + hd = (HD_USTAR *)blk; + + /* + * check for block of zero's first, a simple and fast test then check + * ustar magic cookie. We should use TMAGLEN, but some USTAR archive + * programs are fouled up and create archives missing the \0. Last we + * check the checksum. If ok we have to assume it is a valid header. + */ + if (hd->name[0] == '\0') + return -1; + if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0) + return -1; + /* This is GNU tar */ + if (strncmp(hd->magic, "ustar ", 8) == 0 && !is_gnutar && + !seen_gnu_warning) { + seen_gnu_warning = 1; + tty_warn(0, + "Trying to read GNU tar archive with GNU extensions and end-of-volume checks off"); + } + return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 0); +} + +/* + * ustar_rd() + * extract the values out of block already determined to be a ustar header. + * store the values in the ARCHD parameter. + * Return: + * 0 + */ + +int +ustar_rd(ARCHD *arcn, char *buf) +{ + HD_USTAR *hd; + char *dest; + int cnt; + dev_t devmajor; + dev_t devminor; + + /* + * we only get proper sized buffers + */ + if (ustar_id(buf, BLKMULT) < 0) + return -1; + + memset(arcn, 0, sizeof(*arcn)); + arcn->org_name = arcn->name; + arcn->pat = NULL; + arcn->sb.st_nlink = 1; + hd = (HD_USTAR *)buf; + + /* + * see if the filename is split into two parts. if, so joint the parts. + * we copy the prefix first and add a / between the prefix and name. + */ + dest = arcn->name; + if (*(hd->prefix) != '\0') { + cnt = strlcpy(arcn->name, hd->prefix, sizeof(arcn->name)); + dest += cnt; + *dest++ = '/'; + cnt++; + } else { + cnt = 0; + } + + if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) { + arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt, + &gnu_name_string, &gnu_name_length, hd->name, + sizeof(hd->name)) + cnt; + arcn->ln_nlen = expandname(arcn->ln_name, + sizeof(arcn->ln_name), &gnu_link_string, &gnu_link_length, + hd->linkname, sizeof(hd->linkname)); + } + + /* + * follow the spec to the letter. we should only have mode bits, strip + * off all other crud we may be passed. + */ + arcn->sb.st_mode = (mode_t)(asc_u32(hd->mode, sizeof(hd->mode), OCT) & + 0xfff); + arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); + if (arcn->sb.st_size == -1) + return -1; + arcn->sb.st_mtime = (time_t)(int32_t)asc_u32(hd->mtime, sizeof(hd->mtime), OCT); + arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; + + /* + * If we can find the ascii names for gname and uname in the password + * and group files we will use the uid's and gid they bind. Otherwise + * we use the uid and gid values stored in the header. (This is what + * the posix spec wants). + */ + hd->gname[sizeof(hd->gname) - 1] = '\0'; + if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0) + arcn->sb.st_gid = (gid_t)asc_u32(hd->gid, sizeof(hd->gid), OCT); + hd->uname[sizeof(hd->uname) - 1] = '\0'; + if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0) + arcn->sb.st_uid = (uid_t)asc_u32(hd->uid, sizeof(hd->uid), OCT); + + /* + * set the defaults, these may be changed depending on the file type + */ + arcn->pad = 0; + arcn->skip = 0; + arcn->sb.st_rdev = (dev_t)0; + + /* + * set the mode and PAX type according to the typeflag in the header + */ + switch(hd->typeflag) { + case FIFOTYPE: + arcn->type = PAX_FIF; + arcn->sb.st_mode |= S_IFIFO; + break; + case DIRTYPE: + arcn->type = PAX_DIR; + arcn->sb.st_mode |= S_IFDIR; + arcn->sb.st_nlink = 2; + + /* + * Some programs that create ustar archives append a '/' + * to the pathname for directories. This clearly violates + * ustar specs, but we will silently strip it off anyway. + */ + if (arcn->name[arcn->nlen - 1] == '/') + arcn->name[--arcn->nlen] = '\0'; + break; + case BLKTYPE: + case CHRTYPE: + /* + * this type requires the rdev field to be set. + */ + if (hd->typeflag == BLKTYPE) { + arcn->type = PAX_BLK; + arcn->sb.st_mode |= S_IFBLK; + } else { + arcn->type = PAX_CHR; + arcn->sb.st_mode |= S_IFCHR; + } + devmajor = (dev_t)asc_u32(hd->devmajor,sizeof(hd->devmajor),OCT); + devminor = (dev_t)asc_u32(hd->devminor,sizeof(hd->devminor),OCT); + arcn->sb.st_rdev = TODEV(devmajor, devminor); + break; + case SYMTYPE: + case LNKTYPE: + if (hd->typeflag == SYMTYPE) { + arcn->type = PAX_SLK; + arcn->sb.st_mode |= S_IFLNK; + } else { + arcn->type = PAX_HLK; + /* + * so printing looks better + */ + arcn->sb.st_mode |= S_IFREG; + arcn->sb.st_nlink = 2; + } + break; + case LONGLINKTYPE: + case LONGNAMETYPE: + if (is_gnutar) { + /* + * GNU long link/file; we tag these here and let the + * pax internals deal with it -- too ugly otherwise. + */ + if (hd->typeflag != LONGLINKTYPE) + arcn->type = PAX_GLF; + else + arcn->type = PAX_GLL; + arcn->pad = TAR_PAD(arcn->sb.st_size); + arcn->skip = arcn->sb.st_size; + } else { + tty_warn(1, "GNU Long %s found in posix ustar archive.", + hd->typeflag == LONGLINKTYPE ? "Link" : "File"); + } + break; + case FILEXTYPE: + case GLOBXTYPE: + tty_warn(0, "%s extended headers posix ustar archive." + " Extracting as plain files. Following files might be" + " in the wrong directory or have wrong attributes.", + hd->typeflag == FILEXTYPE ? "File" : "Global"); + /*FALLTHROUGH*/ + case CONTTYPE: + case AREGTYPE: + case REGTYPE: + default: + /* + * these types have file data that follows. Set the skip and + * pad fields. + */ + arcn->type = PAX_REG; + arcn->pad = TAR_PAD(arcn->sb.st_size); + arcn->skip = arcn->sb.st_size; + arcn->sb.st_mode |= S_IFREG; + break; + } + return 0; +} + +static int +expandname(char *buf, size_t len, char **gnu_name, size_t *gnu_length, + const char *name, size_t nlen) +{ + if (*gnu_name) { + len = strlcpy(buf, *gnu_name, len); + free(*gnu_name); + *gnu_name = NULL; + *gnu_length = 0; + } else { + if (len > ++nlen) + len = nlen; + len = strlcpy(buf, name, len); + } + return len; +} + +static void +longlink(ARCHD *arcn, int type) +{ + ARCHD larc; + + (void)memset(&larc, 0, sizeof(larc)); + + larc.type = type; + larc.nlen = strlcpy(larc.name, LONG_LINK, sizeof(larc.name)); + + switch (type) { + case PAX_GLL: + gnu_hack_string = arcn->ln_name; + gnu_hack_len = arcn->ln_nlen + 1; + break; + case PAX_GLF: + gnu_hack_string = arcn->name; + gnu_hack_len = arcn->nlen + 1; + break; + default: + errx(1, "Invalid type in GNU longlink %d", type); + } + + /* + * We need a longlink now. + */ + ustar_wr(&larc); +} + +/* + * ustar_wr() + * write a ustar header for the file specified in the ARCHD to the archive + * Have to check for file types that cannot be stored and file names that + * are too long. Be careful of the term (last arg) to u32_oct, we only use + * '\0' for the termination character (this is different than picky tar) + * ASSUMED: space after header in header block is zero filled + * Return: + * 0 if file has data to be written after the header, 1 if file has NO + * data to write after the header, -1 if archive write failed + */ + +static int +size_err(const char *what, ARCHD *arcn) +{ + /* + * header field is out of range + */ + tty_warn(1, "Ustar %s header field is too small for %s", + what, arcn->org_name); + return 1; +} + +int +ustar_wr(ARCHD *arcn) +{ + HD_USTAR *hd; + char *pt; + uintmax_t mtime; + char hdblk[sizeof(HD_USTAR)]; + const char *user, *group; + + switch (arcn->type) { + case PAX_SCK: + /* + * check for those file system types ustar cannot store + */ + if (!is_gnutar) + tty_warn(1, "Ustar cannot archive a socket %s", + arcn->org_name); + return 1; + + case PAX_SLK: + case PAX_HLK: + case PAX_HRG: + /* + * check the length of the linkname + */ + if (arcn->ln_nlen >= (int)sizeof(hd->linkname)) { + if (is_gnutar) { + longlink(arcn, PAX_GLL); + } else { + tty_warn(1, "Link name too long for ustar %s", + arcn->ln_name); + return 1; + } + } + break; + default: + break; + } + + /* + * split the path name into prefix and name fields (if needed). if + * pt != arcn->name, the name has to be split + */ + if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) { + if (is_gnutar) { + longlink(arcn, PAX_GLF); + pt = arcn->name; + } else { + tty_warn(1, "File name too long for ustar %s", + arcn->name); + return 1; + } + } + + /* + * zero out the header so we don't have to worry about zero fill below + */ + memset(hdblk, 0, sizeof(hdblk)); + hd = (HD_USTAR *)hdblk; + arcn->pad = 0L; + + /* + * split the name, or zero out the prefix + */ + if (pt != arcn->name) { + /* + * name was split, pt points at the / where the split is to + * occur, we remove the / and copy the first part to the prefix + */ + *pt = '\0'; + strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix)); + *pt++ = '/'; + } + + /* + * copy the name part. this may be the whole path or the part after + * the prefix + */ + strlcpy(hd->name, pt, sizeof(hd->name)); + + /* + * set the fields in the header that are type dependent + */ + switch(arcn->type) { + case PAX_DIR: + hd->typeflag = DIRTYPE; + if (u32_oct((uintmax_t)0L, hd->size, sizeof(hd->size), 3)) + return size_err("DIRTYPE", arcn); + break; + case PAX_CHR: + case PAX_BLK: + if (arcn->type == PAX_CHR) + hd->typeflag = CHRTYPE; + else + hd->typeflag = BLKTYPE; + if (u32_oct((uintmax_t)MAJOR(arcn->sb.st_rdev), hd->devmajor, + sizeof(hd->devmajor), 3) || + u32_oct((uintmax_t)MINOR(arcn->sb.st_rdev), hd->devminor, + sizeof(hd->devminor), 3) || + u32_oct((uintmax_t)0L, hd->size, sizeof(hd->size), 3)) + return size_err("DEVTYPE", arcn); + break; + case PAX_FIF: + hd->typeflag = FIFOTYPE; + if (u32_oct((uintmax_t)0L, hd->size, sizeof(hd->size), 3)) + return size_err("FIFOTYPE", arcn); + break; + case PAX_GLL: + case PAX_SLK: + case PAX_HLK: + case PAX_HRG: + if (arcn->type == PAX_SLK) + hd->typeflag = SYMTYPE; + else if (arcn->type == PAX_GLL) + hd->typeflag = LONGLINKTYPE; + else + hd->typeflag = LNKTYPE; + strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); + if (u32_oct((uintmax_t)gnu_hack_len, hd->size, + sizeof(hd->size), 3)) + return size_err("LINKTYPE", arcn); + break; + case PAX_GLF: + case PAX_REG: + case PAX_CTG: + default: + /* + * file data with this type, set the padding + */ + if (arcn->type == PAX_GLF) { + hd->typeflag = LONGNAMETYPE; + arcn->pad = TAR_PAD(gnu_hack_len); + if (OFFT_OCT((uint32_t)gnu_hack_len, hd->size, + sizeof(hd->size), 3)) { + tty_warn(1,"File is too long for ustar %s", + arcn->org_name); + return 1; + } + } else { + if (arcn->type == PAX_CTG) + hd->typeflag = CONTTYPE; + else + hd->typeflag = REGTYPE; + arcn->pad = TAR_PAD(arcn->sb.st_size); + if (OFFT_OCT(arcn->sb.st_size, hd->size, + sizeof(hd->size), 3)) { + tty_warn(1,"File is too long for ustar %s", + arcn->org_name); + return 1; + } + } + break; + } + + strncpy(hd->magic, TMAGIC, TMAGLEN); + if (is_gnutar) + hd->magic[TMAGLEN - 1] = hd->version[0] = ' '; + else + strncpy(hd->version, TVERSION, TVERSLEN); + + /* + * set the remaining fields. Some versions want all 16 bits of mode + * we better humor them (they really do not meet spec though).... + */ + if (u32_oct((uintmax_t)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3)) + return size_err("MODE", arcn); + if (u32_oct((uintmax_t)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3)) + return size_err("UID", arcn); + if (u32_oct((uintmax_t)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3)) + return size_err("GID", arcn); + mtime = tst.st_ino ? tst.st_mtime : arcn->sb.st_mtime; + if (u32_oct(mtime, hd->mtime, sizeof(hd->mtime), 3)) + return size_err("MTIME", arcn); + user = user_from_uid(arcn->sb.st_uid, 1); + group = group_from_gid(arcn->sb.st_gid, 1); + strncpy(hd->uname, user ? user : "", sizeof(hd->uname)); + strncpy(hd->gname, group ? group : "", sizeof(hd->gname)); + + /* + * calculate and store the checksum write the header to the archive + * return 0 tells the caller to now write the file data, 1 says no data + * needs to be written + */ + if (u32_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum, + sizeof(hd->chksum), 3)) + return size_err("CHKSUM", arcn); + if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0) + return -1; + if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0) + return -1; + if (gnu_hack_string) { + int res = wr_rdbuf(gnu_hack_string, gnu_hack_len); + int pad = gnu_hack_len; + gnu_hack_string = NULL; + gnu_hack_len = 0; + if (res < 0) + return -1; + if (wr_skip((off_t)(BLKMULT - (pad % BLKMULT))) < 0) + return -1; + } + if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) + return 0; + return 1; +} + +/* + * name_split() + * see if the name has to be split for storage in a ustar header. We try + * to fit the entire name in the name field without splitting if we can. + * The split point is always at a / + * Return + * character pointer to split point (always the / that is to be removed + * if the split is not needed, the points is set to the start of the file + * name (it would violate the spec to split there). A NULL is returned if + * the file name is too long + */ + +static char * +name_split(char *name, int len) +{ + char *start; + + /* + * check to see if the file name is small enough to fit in the name + * field. if so just return a pointer to the name. + */ + if (len < TNMSZ) + return name; + /* + * GNU tar does not honor the prefix+name mode if the magic + * is not "ustar\0". So in GNU tar compatibility mode, we don't + * split the filename into prefix+name because we are setting + * the magic to "ustar " as GNU tar does. This of course will + * end up creating a LongLink record in cases where it does not + * really need do, but we are behaving like GNU tar after all. + */ + if (is_gnutar || len > (TPFSZ + TNMSZ)) + return NULL; + + /* + * we start looking at the biggest sized piece that fits in the name + * field. We walk forward looking for a slash to split at. The idea is + * to find the biggest piece to fit in the name field (or the smallest + * prefix we can find) (the -1 is correct the biggest piece would + * include the slash between the two parts that gets thrown away) + */ + start = name + len - TNMSZ; + while ((*start != '\0') && (*start != '/')) + ++start; + + /* + * if we hit the end of the string, this name cannot be split, so we + * cannot store this file. + */ + if (*start == '\0') + return NULL; + len = start - name; + + /* + * NOTE: /str where the length of str == TNMSZ cannot be stored under + * the p1003.1-1990 spec for ustar. We could force a prefix of / and + * the file would then expand on extract to //str. The len == 0 below + * makes this special case follow the spec to the letter. + */ + if ((len >= TPFSZ) || (len == 0)) + return NULL; + + /* + * ok have a split point, return it to the caller + */ + return start; +} + +/* + * convert a glob into a RE, and add it to the list. we convert to + * four different RE's (because we're using BRE's and can't use | + * alternation :-() with this padding: + * .*\/ and $ + * .*\/ and \/.* + * ^ and $ + * ^ and \/.* + */ +static int +tar_gnutar_exclude_one(const char *line, size_t len) +{ + /* 2 * buffer len + nul */ + char sbuf[MAXPATHLEN * 2 + 1]; + /* + / + // + .*""/\/ + \/.* */ + char rabuf[MAXPATHLEN * 2 + 1 + 1 + 2 + 4 + 4]; + size_t i; + int j = 0; + + if (line[len - 1] == '\n') + len--; + for (i = 0; i < len; i++) { + /* + * convert glob to regexp, escaping everything + */ + if (line[i] == '*') + sbuf[j++] = '.'; + else if (line[i] == '?') { + sbuf[j++] = '.'; + continue; + } else if (!isalnum((unsigned char)line[i]) && + !isblank((unsigned char)line[i])) + sbuf[j++] = '\\'; + sbuf[j++] = line[i]; + } + sbuf[j] = '\0'; + /* don't need the .*\/ ones if we start with /, i guess */ + if (line[0] != '/') { + (void)snprintf(rabuf, sizeof rabuf, "/.*\\/%s$//", sbuf); + if (rep_add(rabuf) < 0) + return (-1); + (void)snprintf(rabuf, sizeof rabuf, "/.*\\/%s\\/.*//", sbuf); + if (rep_add(rabuf) < 0) + return (-1); + } + + (void)snprintf(rabuf, sizeof rabuf, "/^%s$//", sbuf); + if (rep_add(rabuf) < 0) + return (-1); + (void)snprintf(rabuf, sizeof rabuf, "/^%s\\/.*//", sbuf); + if (rep_add(rabuf) < 0) + return (-1); + + return (0); +} + +/* + * deal with GNU tar -X/--exclude-from & --exclude switchs. basically, + * we go through each line of the file, building a string from the "glob" + * lines in the file into RE lines, of the form `/^RE$//', which we pass + * to rep_add(), which will add a empty replacement (exclusion), for the + * named files. + */ +int +tar_gnutar_minus_minus_exclude(const char *path) +{ + size_t len = strlen(path); + + if (len > MAXPATHLEN) + tty_warn(0, "pathname too long: %s", path); + + return (tar_gnutar_exclude_one(path, len)); +} + +int +tar_gnutar_X_compat(const char *path) +{ + char *line; + FILE *fp; + int lineno = 0; + size_t len; + + if (path[0] == '-' && path[1] == '\0') + fp = stdin; + else { + fp = fopen(path, "r"); + if (fp == NULL) { + tty_warn(1, "cannot open %s: %s", path, + strerror(errno)); + return -1; + } + } + + while ((line = fgetln(fp, &len))) { + lineno++; + if (len > MAXPATHLEN) { + tty_warn(0, "pathname too long, line %d of %s", + lineno, path); + } + if (tar_gnutar_exclude_one(line, len)) + return -1; + } + if (fp != stdin) + fclose(fp); + return 0; +} diff --git a/bin/pax/tar.h b/bin/pax/tar.h new file mode 100644 index 0000000..ae7f6ce --- /dev/null +++ b/bin/pax/tar.h @@ -0,0 +1,154 @@ +/* $NetBSD: tar.h,v 1.10 2013/01/24 17:43:44 christos Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tar.h 8.2 (Berkeley) 4/18/94 + */ + +/* + * defines and data structures common to all tar formats + */ +#define CHK_LEN 8 /* length of checksum field */ +#define TNMSZ 100 /* size of name field */ +#ifdef _PAX_ +#define NULLCNT 2 /* number of null blocks in trailer */ +#define CHK_OFFSET 148 /* start of chksum field */ +#define BLNKSUM 256L /* sum of checksum field using ' ' */ +#endif /* _PAX_ */ + +/* + * Values used in typeflag field in all tar formats + * (only REGTYPE, LNKTYPE and SYMTYPE are used in old bsd tar headers) + */ +#define REGTYPE '0' /* Regular File */ +#define AREGTYPE '\0' /* Regular File */ +#define LNKTYPE '1' /* Link */ +#define SYMTYPE '2' /* Symlink */ +#define CHRTYPE '3' /* Character Special File */ +#define BLKTYPE '4' /* Block Special File */ +#define DIRTYPE '5' /* Directory */ +#define FIFOTYPE '6' /* FIFO */ +#define CONTTYPE '7' /* high perf file */ +#define GLOBXTYPE 'g' /* global extended header */ +#define FILEXTYPE 'x' /* file extended header */ + +/* + * GNU tar compatibility; + */ +#define LONGLINKTYPE 'K' /* Long Symlink */ +#define LONGNAMETYPE 'L' /* Long File */ + +/* + * Mode field encoding of the different file types - values in octal + */ +#define TSUID 04000 /* Set UID on execution */ +#define TSGID 02000 /* Set GID on execution */ +#define TSVTX 01000 /* Reserved */ +#define TUREAD 00400 /* Read by owner */ +#define TUWRITE 00200 /* Write by owner */ +#define TUEXEC 00100 /* Execute/Search by owner */ +#define TGREAD 00040 /* Read by group */ +#define TGWRITE 00020 /* Write by group */ +#define TGEXEC 00010 /* Execute/Search by group */ +#define TOREAD 00004 /* Read by other */ +#define TOWRITE 00002 /* Write by other */ +#define TOEXEC 00001 /* Execute/Search by other */ + +#ifdef _PAX_ +/* + * Pad with a bit mask, much faster than doing a mod but only works on powers + * of 2. Macro below is for block of 512 bytes. + */ +#define TAR_PAD(x) ((512 - ((x) & 511)) & 511) +#endif /* _PAX_ */ + +/* + * structure of an old tar header as it appeared in BSD releases + */ +typedef struct { + char name[TNMSZ]; /* name of entry */ + char mode[8]; /* mode */ + char uid[8]; /* uid */ + char gid[8]; /* gid */ + char size[12]; /* size */ + char mtime[12]; /* modification time */ + char chksum[CHK_LEN]; /* checksum */ + char linkflag; /* norm, hard, or sym. */ + char linkname[TNMSZ]; /* linked to name */ +} HD_TAR; + +#ifdef _PAX_ +/* + * -o options for BSD tar to not write directories to the archive + */ +#define TAR_NODIR "nodir" +#define TAR_OPTION "write_opt" + +/* + * default device names + */ +extern char DEV_0[]; +extern char DEV_1[]; +extern char DEV_4[]; +extern char DEV_5[]; +extern char DEV_7[]; +extern char DEV_8[]; +#endif /* _PAX_ */ + +/* + * Data Interchange Format - Extended tar header format - POSIX 1003.1-1990 + */ +#define TPFSZ 155 +#define TMAGIC "ustar" /* ustar and a null */ +#define TMAGLEN 6 +#define TVERSION "00" /* 00 and no null */ +#define TVERSLEN 2 + +typedef struct { + char name[TNMSZ]; /* name of entry */ + char mode[8]; /* mode */ + char uid[8]; /* uid */ + char gid[8]; /* gid */ + char size[12]; /* size */ + char mtime[12]; /* modification time */ + char chksum[CHK_LEN]; /* checksum */ + char typeflag; /* type of file. */ + char linkname[TNMSZ]; /* linked to name */ + char magic[TMAGLEN]; /* magic cookie */ + char version[TVERSLEN]; /* version */ + char uname[32]; /* ascii owner name */ + char gname[32]; /* ascii group name */ + char devmajor[8]; /* major device number */ + char devminor[8]; /* minor device number */ + char prefix[TPFSZ]; /* linked to name */ +} HD_USTAR; diff --git a/bin/pax/tty_subs.c b/bin/pax/tty_subs.c new file mode 100644 index 0000000..5956877 --- /dev/null +++ b/bin/pax/tty_subs.c @@ -0,0 +1,200 @@ +/* $NetBSD: tty_subs.c,v 1.19 2007/04/23 18:40:22 christos Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +#if !defined(lint) +#if 0 +static char sccsid[] = "@(#)tty_subs.c 8.2 (Berkeley) 4/18/94"; +#else +__RCSID("$NetBSD: tty_subs.c,v 1.19 2007/04/23 18:40:22 christos Exp $"); +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "extern.h" +#include + +/* + * routines that deal with I/O to and from the user + */ + +#define DEVTTY "/dev/tty" /* device for interactive i/o */ +static FILE *ttyoutf = NULL; /* output pointing at control tty */ +static FILE *ttyinf = NULL; /* input pointing at control tty */ + +/* + * tty_init() + * Try to open the controlling terminal (if any) for this process. If the + * open fails, future ops that require user input will get an EOF. + */ + +int +tty_init(void) +{ + int ttyfd; + + if ((ttyfd = open(DEVTTY, O_RDWR)) >= 0) { + if ((ttyoutf = fdopen(ttyfd, "w")) != NULL) { + if ((ttyinf = fdopen(ttyfd, "r")) != NULL) + return 0; + (void)fclose(ttyoutf); + } + (void)close(ttyfd); + } + + if (iflag) { + tty_warn(1, "Fatal error, cannot open %s", DEVTTY); + return -1; + } + return 0; +} + +/* + * tty_prnt() + * print a message using the specified format to the controlling tty + * if there is no controlling terminal, just return. + */ + +void +tty_prnt(const char *fmt, ...) +{ + va_list ap; + if (ttyoutf == NULL) + return; + va_start(ap, fmt); + (void)vfprintf(ttyoutf, fmt, ap); + va_end(ap); + (void)fflush(ttyoutf); +} + +/* + * tty_read() + * read a string from the controlling terminal if it is open into the + * supplied buffer + * Return: + * 0 if data was read, -1 otherwise. + */ + +int +tty_read(char *str, int len) +{ + char *pt; + + if ((--len <= 0) || (ttyinf == NULL) || (fgets(str,len,ttyinf) == NULL)) + return -1; + *(str + len) = '\0'; + + /* + * strip off that trailing newline + */ + if ((pt = strchr(str, '\n')) != NULL) + *pt = '\0'; + return 0; +} + +/* + * tty_warn() + * write a warning message to stderr. if "set" the exit value of pax + * will be non-zero. + */ + +void +tty_warn(int set, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + if (set) + exit_val = 1; + /* + * when vflag we better ship out an extra \n to get this message on a + * line by itself + */ + if ((Vflag || vflag) && vfpart) { + (void)fputc('\n', stderr); + vfpart = 0; + } + (void)fprintf(stderr, "%s: ", argv0); + (void)vfprintf(stderr, fmt, ap); + va_end(ap); + (void)fputc('\n', stderr); +} + +/* + * syswarn() + * write a warning message to stderr. if "set" the exit value of pax + * will be non-zero. + */ + +void +syswarn(int set, int errnum, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + if (set) + exit_val = 1; + /* + * when vflag we better ship out an extra \n to get this message on a + * line by itself + */ + if ((Vflag || vflag) && vfpart) { + (void)fputc('\n', stdout); + vfpart = 0; + } + (void)fprintf(stderr, "%s: ", argv0); + (void)vfprintf(stderr, fmt, ap); + va_end(ap); + + /* + * format and print the errno + */ + if (errnum > 0) + (void)fprintf(stderr, " (%s)", strerror(errnum)); + (void)fputc('\n', stderr); +} -- cgit v1.2.3-60-g2f50