From e5795b70c403b1673f54d12621b8e670f5a7377d Mon Sep 17 00:00:00 2001 From: Kiyoshi Aman Date: Tue, 28 May 2019 19:21:23 -0500 Subject: usr.bin/compress: remove, as it is handled by ncompress --- usr.bin/compress/compress.1 | 171 ---------- usr.bin/compress/compress.c | 459 ------------------------- usr.bin/compress/doc/NOTES | 139 -------- usr.bin/compress/doc/README | 283 --------------- usr.bin/compress/doc/revision.log | 116 ------- usr.bin/compress/zopen.3 | 138 -------- usr.bin/compress/zopen.c | 699 -------------------------------------- 7 files changed, 2005 deletions(-) delete mode 100644 usr.bin/compress/compress.1 delete mode 100644 usr.bin/compress/compress.c delete mode 100644 usr.bin/compress/doc/NOTES delete mode 100644 usr.bin/compress/doc/README delete mode 100644 usr.bin/compress/doc/revision.log delete mode 100644 usr.bin/compress/zopen.3 delete mode 100644 usr.bin/compress/zopen.c (limited to 'usr.bin') diff --git a/usr.bin/compress/compress.1 b/usr.bin/compress/compress.1 deleted file mode 100644 index 84bc47c..0000000 --- a/usr.bin/compress/compress.1 +++ /dev/null @@ -1,171 +0,0 @@ -.\" $NetBSD: compress.1,v 1.16 2017/07/04 06:58:55 wiz Exp $ -.\" -.\" Copyright (c) 1986, 1990, 1993 -.\" The Regents of the University of California. All rights reserved. -.\" -.\" This code is derived from software contributed to Berkeley by -.\" James A. Woods, derived from original work by Spencer Thomas -.\" and Joseph Orost. -.\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" 3. Neither the name of the University nor the names of its contributors -.\" may be used to endorse or promote products derived from this software -.\" without specific prior written permission. -.\" -.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. -.\" -.\" @(#)compress.1 8.2 (Berkeley) 4/18/94 -.\" -.Dd January 23, 2003 -.Dt COMPRESS 1 -.Os -.Sh NAME -.Nm compress , -.\".Nm uncompress , -.Nm uncompress -.\".Nm zcat -.Nd compress and expand data -.Sh SYNOPSIS -.Nm -.Op Fl cdfv -.Op Fl b Ar bits -.Op Ar -.Nm uncompress -.Op Fl cdfv -.Op Ar -.\".Nm zcat -.\".Op Ar -.Sh DESCRIPTION -.Nm -reduces the size of the named files using adaptive Lempel-Ziv coding. -Each -.Ar file -is renamed to the same name plus the extension -.Dq .Z . -As many of the modification time, access time, file flags, file mode, -user ID, and group ID as allowed by permissions are retained in the -new file. -If compression would not reduce the size of a -.Ar file , -the file is ignored. -.Pp -.Nm uncompress -restores the compressed files to their original form, renaming the -files by deleting the -.Dq .Z -extension. -.\".Pp -.\".Nm Zcat -.\"is an alias for -.\".Dq "uncompress -c" . -.Pp -If renaming the files would cause files to be overwritten and the standard -input device is a terminal, the user is prompted (on the standard error -output) for confirmation. -If prompting is not possible or confirmation is not received, the files -are not overwritten. -.Pp -If no files are specified, the standard input is compressed or uncompressed -to the standard output. -If either the input and output files are not regular files, the checks for -reduction in size and file overwriting are not performed, the input file is -not removed, and the attributes of the input file are not retained. -.Pp -The options are as follows: -.Bl -tag -width Ds -.It Fl b -Specify the -.Ar bits -code limit (see below). -.It Fl c -Compressed or uncompressed output is written to the standard output. -No files are modified. -.It Fl d -Force decompression. -.It Fl f -Force compression of -.Ar file , -even if it is not actually reduced in size. -Additionally, files are overwritten without prompting for confirmation. -.It Fl v -Print the percentage reduction of each file. -.El -.Pp -.Nm -uses a modified Lempel-Ziv algorithm. -Common substrings in the file are first replaced by 9-bit codes 257 and up. -When code 512 is reached, the algorithm switches to 10-bit codes and -continues to use more bits until the -limit specified by the -.Fl b -flag is reached (the default is 16). -.Ar Bits -must be between 9 and 16. -.Pp -After the -.Ar bits -limit is reached, -.Nm -periodically checks the compression ratio. -If it is increasing, -.Nm -continues to use the existing code dictionary. -However, if the compression ratio decreases, -.Nm -discards the table of substrings and rebuilds it from scratch. -This allows the algorithm to adapt to the next "block" of the file. -.Pp -The -.Fl b -flag is omitted for -.Ar uncompress -since the -.Ar bits -parameter specified during compression -is encoded within the output, along with -a magic number to ensure that neither decompression of random data nor -recompression of compressed data is attempted. -.Pp -The amount of compression obtained depends on the size of the -input, the number of -.Ar bits -per code, and the distribution of common substrings. -Typically, text such as source code or English is reduced by 50\-60%. -Compression is generally much better than that achieved by Huffman -coding (as used in the historical command pack), or adaptive Huffman -coding (as used in the historical command compact), and takes less -time to compute. -.Sh EXIT STATUS -.Ex -std compress -.Sh SEE ALSO -.Xr zcat 1 -.Rs -.%A Welch, Terry A. -.%D June, 1984 -.%T "A Technique for High Performance Data Compression" -.%J "IEEE Computer" -.%V 17:6 -.%P pp. 8-19 -.Re -.Sh HISTORY -The -.Nm -command appeared in -.Bx 4.3 . diff --git a/usr.bin/compress/compress.c b/usr.bin/compress/compress.c deleted file mode 100644 index 93fdc76..0000000 --- a/usr.bin/compress/compress.c +++ /dev/null @@ -1,459 +0,0 @@ -/* $NetBSD: compress.c,v 1.26 2011/08/30 23:08:05 joerg Exp $ */ - -/*- - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#ifndef lint -__COPYRIGHT("@(#) Copyright (c) 1992, 1993\ - The Regents of the University of California. All rights reserved."); -#endif /* not lint */ - -#ifndef lint -#if 0 -static char sccsid[] = "@(#)compress.c 8.2 (Berkeley) 1/7/94"; -#else -__RCSID("$NetBSD: compress.c,v 1.26 2011/08/30 23:08:05 joerg Exp $"); -#endif -#endif /* not lint */ - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -static void compress(const char *, const char *, int); -static void cwarn(const char *, ...) __printflike(1, 2); -static void cwarnx(const char *, ...) __printflike(1, 2); -static void decompress(const char *, const char *, int); -static int permission(const char *); -static void setfile(const char *, struct stat *); -__dead static void usage(int); - -extern FILE *zopen(const char *fname, const char *mode, int bits); - -static int eval, force, verbose; -static int isstdout, isstdin; - -int -main(int argc, char **argv) -{ - enum {COMPRESS, DECOMPRESS} style = COMPRESS; - size_t len; - int bits, cat, ch; - char *p, newname[MAXPATHLEN]; - - if ((p = strrchr(argv[0], '/')) == NULL) - p = argv[0]; - else - ++p; - if (!strcmp(p, "uncompress")) - style = DECOMPRESS; - else if (!strcmp(p, "compress")) - style = COMPRESS; - else if (!strcmp(p, "zcat")) { - style = DECOMPRESS; - cat = 1; - } - else - errx(1, "unknown program name"); - - bits = cat = 0; - while ((ch = getopt(argc, argv, "b:cdfv")) != -1) - switch(ch) { - case 'b': - bits = strtol(optarg, &p, 10); - if (*p) - errx(1, "illegal bit count -- %s", optarg); - break; - case 'c': - cat = 1; - break; - case 'd': /* Backward compatible. */ - style = DECOMPRESS; - break; - case 'f': - force = 1; - break; - case 'v': - verbose = 1; - break; - case '?': - default: - usage(style == COMPRESS); - } - argc -= optind; - argv += optind; - - if (argc == 0) { - switch(style) { - case COMPRESS: - isstdout = 1; - isstdin = 1; - (void)compress("/dev/stdin", "/dev/stdout", bits); - break; - case DECOMPRESS: - isstdout = 1; - isstdin = 1; - (void)decompress("/dev/stdin", "/dev/stdout", bits); - break; - } - exit (eval); - } - - if (cat == 1 && argc > 1) - errx(1, "the -c option permits only a single file argument"); - - for (; *argv; ++argv) { - isstdout = 0; - switch(style) { - case COMPRESS: - if (cat) { - isstdout = 1; - compress(*argv, "/dev/stdout", bits); - break; - } - if ((p = strrchr(*argv, '.')) != NULL && - !strcmp(p, ".Z")) { - cwarnx("%s: name already has trailing .Z", - *argv); - break; - } - len = strlen(*argv); - if (len > sizeof(newname) - 3) { - cwarnx("%s: name too long", *argv); - break; - } - memmove(newname, *argv, len); - newname[len] = '.'; - newname[len + 1] = 'Z'; - newname[len + 2] = '\0'; - compress(*argv, newname, bits); - break; - case DECOMPRESS: - len = strlen(*argv); - if ((p = strrchr(*argv, '.')) == NULL || - strcmp(p, ".Z")) { - if (len > sizeof(newname) - 3) { - cwarnx("%s: name too long", *argv); - break; - } - memmove(newname, *argv, len); - newname[len] = '.'; - newname[len + 1] = 'Z'; - newname[len + 2] = '\0'; - decompress(newname, - cat ? "/dev/stdout" : *argv, bits); - if (cat) - isstdout = 1; - } else { - if (len - 2 > sizeof(newname) - 1) { - cwarnx("%s: name too long", *argv); - break; - } - memmove(newname, *argv, len - 2); - newname[len - 2] = '\0'; - decompress(*argv, - cat ? "/dev/stdout" : newname, bits); - if (cat) - isstdout = 1; - } - break; - } - } - exit (eval); -} - -static void -compress(const char *in, const char *out, int bits) -{ - size_t nr; - struct stat isb, sb; - const char *error = NULL; - FILE *ifp, *ofp; - int exists, isreg, oreg; - u_char buf[BUFSIZ]; - - if (!isstdout) { - exists = !stat(out, &sb); - if (!force && exists && S_ISREG(sb.st_mode) && !permission(out)) - return; - oreg = !exists || S_ISREG(sb.st_mode); - } else - oreg = 0; - - ifp = ofp = NULL; - if ((ifp = fopen(in, "r")) == NULL) { - cwarn("%s", in); - return; - } - - if (!isstdin) { - if (stat(in, &isb)) { /* DON'T FSTAT! */ - cwarn("%s", in); - goto err; - } - if (!S_ISREG(isb.st_mode)) - isreg = 0; - else - isreg = 1; - } else - isreg = 0; - - if ((ofp = zopen(out, "w", bits)) == NULL) { - cwarn("%s", out); - goto err; - } - oreg <<= 1; - while ((nr = fread(buf, 1, sizeof(buf), ifp)) != 0) - if (fwrite(buf, 1, nr, ofp) != nr) { - cwarn("%s", out); - goto err; - } - - if (ferror(ifp)) - error = in; - if (fclose(ifp)) - if (error == NULL) - error = in; - if (fclose(ofp)) - if (error == NULL) - error = out; - ifp = NULL; - ofp = NULL; - if (error) { - cwarn("%s", error); - goto err; - } - - if (isreg && oreg) { - if (stat(out, &sb)) { - cwarn("%s", out); - goto err; - } - - if (!force && sb.st_size >= isb.st_size) { - if (verbose) - (void)printf("%s: file would grow; left unmodified\n", in); - goto err; - } - - setfile(out, &isb); - - if (unlink(in)) - cwarn("%s", in); - - if (verbose) { - (void)printf("%s: ", out); - if (isb.st_size > sb.st_size) - (void)printf("%.0f%% compression\n", - ((double)sb.st_size / isb.st_size) * 100.0); - else - (void)printf("%.0f%% expansion\n", - ((double)isb.st_size / sb.st_size) * 100.0); - } - } - return; - -err: if (ofp) - (void)fclose(ofp); - if (oreg == 2) - (void)unlink(out); - if (ifp) - (void)fclose(ifp); -} - -static void -decompress(const char *in, const char *out, int bits) -{ - size_t nr; - struct stat sb; - FILE *ifp, *ofp; - int exists, isreg, oreg; - u_char buf[BUFSIZ]; - - if (!isstdout) { - exists = !stat(out, &sb); - if (!force && exists && S_ISREG(sb.st_mode) && !permission(out)) - return; - oreg = !exists || S_ISREG(sb.st_mode); - } else - oreg = 0; - - ifp = ofp = NULL; - if ((ofp = fopen(out, "w")) == NULL) { - cwarn("%s", out); - return; - } - - if ((ifp = zopen(in, "r", bits)) == NULL) { - cwarn("%s", in); - goto err; - } - if (!isstdin) { - if (stat(in, &sb)) { - cwarn("%s", in); - goto err; - } - if (!S_ISREG(sb.st_mode)) - isreg = 0; - else - isreg = 1; - } else - isreg = 0; - - oreg <<= 1; - while ((nr = fread(buf, 1, sizeof(buf), ifp)) != 0) - if (fwrite(buf, 1, nr, ofp) != nr) { - cwarn("%s", out); - goto err; - } - - if (ferror(ifp)) { - cwarn("%s", in); - goto err; - } - if (fclose(ifp)) { - ifp = NULL; - cwarn("%s", in); - goto err; - } - ifp = NULL; - - if (fclose(ofp)) { - ofp = NULL; - cwarn("%s", out); - goto err; - } - - if (isreg && oreg) { - setfile(out, &sb); - - if (unlink(in)) - cwarn("%s", in); - } - return; - -err: if (ofp) - (void)fclose(ofp); - if (oreg == 2) - (void)unlink(out); - if (ifp) - (void)fclose(ifp); -} - -static void -setfile(const char *name, struct stat *fs) -{ - static struct timeval tv[2]; - - fs->st_mode &= S_ISUID|S_ISGID|S_IRWXU|S_IRWXG|S_IRWXO; - - TIMESPEC_TO_TIMEVAL(&tv[0], &fs->st_atimespec); - TIMESPEC_TO_TIMEVAL(&tv[1], &fs->st_mtimespec); - if (utimes(name, tv)) - cwarn("utimes: %s", name); - - /* - * Changing the ownership probably won't succeed, unless we're root - * or POSIX_CHOWN_RESTRICTED is not set. Set uid/gid before setting - * the mode; current BSD behavior is to remove all setuid bits on - * chown. If chown fails, lose setuid/setgid bits. - */ - if (chown(name, fs->st_uid, fs->st_gid)) { - if (errno != EPERM) - cwarn("chown: %s", name); - fs->st_mode &= ~(S_ISUID|S_ISGID); - } - if (chmod(name, fs->st_mode)) - cwarn("chown: %s", name); - - /* - * Restore the file's flags. However, do this only if the original - * file had any flags set; this avoids a warning on file-systems that - * do not support flags. - */ - if (fs->st_flags != 0 && chflags(name, fs->st_flags)) - cwarn("chflags: %s", name); -} - -static int -permission(const char *fname) -{ - int ch, first; - - if (!isatty(fileno(stderr))) - return (0); - (void)fprintf(stderr, "overwrite %s? ", fname); - first = ch = getchar(); - while (ch != '\n' && ch != EOF) - ch = getchar(); - return (first == 'y'); -} - -static void -usage(int iscompress) -{ - if (iscompress) - (void)fprintf(stderr, - "usage: compress [-cdfv] [-b bits] [file ...]\n"); - else - (void)fprintf(stderr, - "usage: uncompress [-cdfv] [-b bits] [file ...]\n"); - exit(1); -} - -static void -cwarnx(const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - vwarnx(fmt, ap); - va_end(ap); - eval = 1; -} - -static void -cwarn(const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - vwarn(fmt, ap); - va_end(ap); - eval = 1; -} diff --git a/usr.bin/compress/doc/NOTES b/usr.bin/compress/doc/NOTES deleted file mode 100644 index 26181bf..0000000 --- a/usr.bin/compress/doc/NOTES +++ /dev/null @@ -1,139 +0,0 @@ -From: James A. Woods - ->From vn Fri Dec 2 18:05:27 1988 -Subject: Re: Looking for C source for RSA -Newsgroups: sci.crypt - -# Illegitimi noncarborundum - -Patents are a tar pit. - -A good case can be made that most are just a license to sue, and nothing -is illegal until a patent is upheld in court. - -For example, if you receive netnews by means other than 'nntp', -these very words are being modulated by 'compress', -a variation on the patented Lempel-Ziv-Welch algorithm. - -Original Ziv-Lempel is patent number 4,464,650, and the more powerful -LZW method is #4,558,302. Yet despite any similarities between 'compress' -and LZW (the public-domain 'compress' code was designed and given to the -world before the ink on the Welch patent was dry), no attorneys from Sperry -(the assignee) have asked you to unplug your Usenet connection. - -Why? I can't speak for them, but it is possible the claims are too broad, -or, just as bad, not broad enough. ('compress' does things not mentioned -in the Welch patent.) Maybe they realize that they can commercialize -LZW better by selling hardware implementations rather than by licensing -software. Again, the LZW software delineated in the patent is *not* -the same as that of 'compress'. - -At any rate, court-tested software patents are a different animal; -corporate patents in a portfolio are usually traded like baseball cards -to shut out small fry rather than actually be defended before -non-technical juries. Perhaps RSA will undergo this test successfully, -although the grant to "exclude others from making, using, or selling" -the invention would then only apply to the U.S. (witness the -Genentech patent of the TPA molecule in the U.S. but struck down -in Great Britain as too broad.) - -The concept is still exotic for those who learned in school the rule of thumb -that one may patent "apparatus" but not an "idea". -Apparently this all changed in Diamond v. Diehr (1981) when the U. S. Supreme -Court reversed itself. - -Scholars should consult the excellent article in the Washington and Lee -Law Review (fall 1984, vol. 41, no. 4) by Anthony and Colwell for a -comprehensive survey of an area which will remain murky for some time. - -Until the dust clears, how you approach ideas which are patented depends -on how paranoid you are of a legal onslaught. Arbitrary? Yes. But -the patent bar the CCPA (Court of Customs and Patent Appeals) -thanks you for any uncertainty as they, at least, stand to gain -from any trouble. - -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -From: James A. Woods -Subject: Re: Looking for C source for RSA (actually 'compress' patents) - - In article <2042@eos.UUCP> you write: - >The concept is still exotic for those who learned in school the rule of thumb - >that one may patent "apparatus" but not an "idea". - -A rule of thumb that has never been completely valid, as any chemical -engineer can tell you. (Chemical processes were among the earliest patents, -as I recall.) - - ah yes -- i date myself when relaying out-of-date advice from elderly - attorneys who don't even specialize in patents. one other interesting - class of patents include the output of optical lens design programs, - which yield formulae which can then fairly directly can be molded - into glass. although there are restrictions on patenting equations, - the "embedded systems" seem to fly past the legal gauntlets. - - anyway, i'm still learning about intellectual property law after - several conversations from a unisys (nee sperry) lawyer re 'compress'. - - it's more complicated than this, but they're letting (oral - communication only) software versions of 'compress' slide - as far as licensing fees go. this includes 'arc', 'stuffit', - and other commercial wrappers for 'compress'. yet they are - signing up licensees for hardware chips. hewlett-packard - supposedly has an active vlsi project, and unisys has - board-level lzw-based tape controllers. (to build lzw into - a disk controller would be strange, as you'd have to build - in a filesystem too!) - - it's byzantine - that unisys is in a tiff with hp regarding the patents, - after discovering some sort of "compress" button on some - hp terminal product. why? well, professor abraham lempel jumped - from being department chairman of computer science at technion in - israel to sperry (where he got the first patent), but then to work - at hewlett-packard on sabbatical. the second welch patent - is only weakly derivative of the first, so they want chip - licenses and hp relented. however, everyone agrees something - like the current unix implementation is the way to go with - software, so hp (and ucb) long ago asked spencer thomas and i to sign - off on copyright permission (although they didn't need to, it being pd). - lempel, hp, and unisys grumbles they can't make money off the - software since a good free implementation (not the best -- - i have more ideas!) escaped via usenet. (lempel's own pascal - code was apparently horribly slow.) - i don't follow the ibm 'arc' legal bickering; my impression - is that the pc folks are making money off the archiver/wrapper - look/feel of the thing [if ms-dos can be said to have a look and feel]. - - now where is telebit with the compress firmware? in a limbo - netherworld, probably, with sperry still welcoming outfits - to sign patent licenses, a common tactic to bring other small fry - into the fold. the guy who crammed 12-bit compess into the modem - there left. also what is transpiring with 'compress' and sys 5 rel 4? - beats me, but if sperry got a hold of them on these issues, - at&t would likely re-implement another algorithm if they - thought 'compress' infringes. needful to say, i don't think - it does after the abovementioned legal conversation. - my own beliefs on whether algorithms should be patentable at all - change with the weather. if the courts finally nail down - patent protection for algorithms, academic publication in - textbooks will be somewhat at odds with the engineering world, - where the textbook codes will simply be a big tease to get - money into the patent holder coffers... - - oh, if you implement lzw from the patent, you won't get - good rates because it doesn't mention adaptive table reset, - lack thereof being *the* serious deficiency of thomas' first version. - - now i know that patent law generally protects against independent - re-invention (like the 'xor' hash function pleasantly mentioned - in the patent [but not the paper]). - but the upshot is that if anyone ever wanted to sue us, - we're partially covered with - independently-developed twists, plus the fact that some of us work - in a bureacratic morass (as contractor to a public agency in my case). - - quite a mess, huh? i've wanted to tell someone this stuff - for a long time, for posterity if nothing else. - -james - diff --git a/usr.bin/compress/doc/README b/usr.bin/compress/doc/README deleted file mode 100644 index 6803287..0000000 --- a/usr.bin/compress/doc/README +++ /dev/null @@ -1,283 +0,0 @@ - - @(#)README 8.1 (Berkeley) 6/9/93 - -Compress version 4.0 improvements over 3.0: - o compress() speedup (10-50%) by changing division hash to xor - o decompress() speedup (5-10%) - o Memory requirements reduced (3-30%) - o Stack requirements reduced to less than 4kb - o Removed 'Big+Fast' compress code (FBITS) because of compress speedup - o Portability mods for Z8000 and PC/XT (but not zeus 3.2) - o Default to 'quiet' mode - o Unification of 'force' flags - o Manual page overhaul - o Portability enhancement for M_XENIX - o Removed text on #else and #endif - o Added "-V" switch to print version and options - o Added #defines for SIGNED_COMPARE_SLOW - o Added Makefile and "usermem" program - o Removed all floating point computations - o New programs: [deleted] - -The "usermem" script attempts to determine the maximum process size. Some -editing of the script may be necessary (see the comments). [It should work -fine on 4.3 bsd.] If you can't get it to work at all, just create file -"USERMEM" containing the maximum process size in decimal. - -The following preprocessor symbols control the compilation of "compress.c": - - o USERMEM Maximum process memory on the system - o SACREDMEM Amount to reserve for other proceses - o SIGNED_COMPARE_SLOW Unsigned compare instructions are faster - o NO_UCHAR Don't use "unsigned char" types - o BITS Overrules default set by USERMEM-SACREDMEM - o vax Generate inline assembler - o interdata Defines SIGNED_COMPARE_SLOW - o M_XENIX Makes arrays < 65536 bytes each - o pdp11 BITS=12, NO_UCHAR - o z8000 BITS=12 - o pcxt BITS=12 - o BSD4_2 Allow long filenames ( > 14 characters) & - Call setlinebuf(stderr) - -The difference "usermem-sacredmem" determines the maximum BITS that can be -specified with the "-b" flag. - -memory: at least BITS ------- -- ----- ---- - 433,484 16 - 229,600 15 - 127,536 14 - 73,464 13 - 0 12 - -The default is BITS=16. - -The maximum bits can be overrulled by specifying "-DBITS=bits" at -compilation time. - -WARNING: files compressed on a large machine with more bits than allowed by -a version of compress on a smaller machine cannot be decompressed! Use the -"-b12" flag to generate a file on a large machine that can be uncompressed -on a 16-bit machine. - -The output of compress 4.0 is fully compatible with that of compress 3.0. -In other words, the output of compress 4.0 may be fed into uncompress 3.0 or -the output of compress 3.0 may be fed into uncompress 4.0. - -The output of compress 4.0 not compatible with that of -compress 2.0. However, compress 4.0 still accepts the output of -compress 2.0. To generate output that is compatible with compress -2.0, use the undocumented "-C" flag. - - -from mod.sources, submitted by vax135!petsd!joe (Joe Orost), 8/1/85 --------------------------------- - -Enclosed is compress version 3.0 with the following changes: - -1. "Block" compression is performed. After the BITS run out, the - compression ratio is checked every so often. If it is decreasing, - the table is cleared and a new set of substrings are generated. - - This makes the output of compress 3.0 not compatible with that of - compress 2.0. However, compress 3.0 still accepts the output of - compress 2.0. To generate output that is compatible with compress - 2.0, use the undocumented "-C" flag. - -2. A quiet "-q" flag has been added for use by the news system. - -3. The character chaining has been deleted and the program now uses - hashing. This improves the speed of the program, especially - during decompression. Other speed improvements have been made, - such as using putc() instead of fwrite(). - -4. A large table is used on large machines when a relatively small - number of bits is specified. This saves much time when compressing - for a 16-bit machine on a 32-bit virtual machine. Note that the - speed improvement only occurs when the input file is > 30000 - characters, and the -b BITS is less than or equal to the cutoff - described below. - -Most of these changes were made by James A. Woods (ames!jaw). Thank you -James! - -To compile compress: - - cc -O -DUSERMEM=usermem -o compress compress.c - -Where "usermem" is the amount of physical user memory available (in bytes). -If any physical memory is to be reserved for other processes, put in -"-DSACREDMEM sacredmem", where "sacredmem" is the amount to be reserved. - -The difference "usermem-sacredmem" determines the maximum BITS that can be -specified, and the cutoff bits where the large+fast table is used. - -memory: at least BITS cutoff ------- -- ----- ---- ------ - 4,718,592 16 13 - 2,621,440 16 12 - 1,572,864 16 11 - 1,048,576 16 10 - 631,808 16 -- - 329,728 15 -- - 178,176 14 -- - 99,328 13 -- - 0 12 -- - -The default memory size is 750,000 which gives a maximum BITS=16 and no -large+fast table. - -The maximum bits can be overruled by specifying "-DBITS=bits" at -compilation time. - -If your machine doesn't support unsigned characters, define "NO_UCHAR" -when compiling. - -If your machine has "int" as 16-bits, define "SHORT_INT" when compiling. - -After compilation, move "compress" to a standard executable location, such -as /usr/local. Then: - cd /usr/local - ln compress uncompress - ln compress zcat - -On machines that have a fixed stack size (such as Perkin-Elmer), set the -stack to at least 12kb. ("setstack compress 12" on Perkin-Elmer). - -Next, install the manual (compress.l). - cp compress.l /usr/man/manl - cd /usr/man/manl - ln compress.l uncompress.l - ln compress.l zcat.l - - - or - - - cp compress.l /usr/man/man1/compress.1 - cd /usr/man/man1 - ln compress.1 uncompress.1 - ln compress.1 zcat.1 - - regards, - petsd!joe - -Here is a note from the net: - ->From hplabs!pesnta!amd!turtlevax!ken Sat Jan 5 03:35:20 1985 -Path: ames!hplabs!pesnta!amd!turtlevax!ken -From: ken@turtlevax.UUCP (Ken Turkowski) -Newsgroups: net.sources -Subject: Re: Compress release 3.0 : sample Makefile -Organization: CADLINC, Inc. @ Menlo Park, CA - -In the compress 3.0 source recently posted to mod.sources, there is a -#define variable which can be set for optimum performance on a machine -with a large amount of memory. A program (usermem) to calculate the -useable amount of physical user memory is enclosed, as well as a sample -4.2bsd Vax Makefile for compress. - -Here is the README file from the previous version of compress (2.0): - ->Enclosed is compress.c version 2.0 with the following bugs fixed: -> ->1. The packed files produced by compress are different on different -> machines and dependent on the vax sysgen option. -> The bug was in the different byte/bit ordering on the -> various machines. This has been fixed. -> -> This version is NOT compatible with the original vax posting -> unless the '-DCOMPATIBLE' option is specified to the C -> compiler. The original posting has a bug which I fixed, -> causing incompatible files. I recommend you NOT to use this -> option unless you already have a lot of packed files from -> the original posting by thomas. ->2. The exit status is not well defined (on some machines) causing the -> scripts to fail. -> The exit status is now 0,1 or 2 and is documented in -> compress.l. ->3. The function getopt() is not available in all C libraries. -> The function getopt() is no longer referenced by the -> program. ->4. Error status is not being checked on the fwrite() and fflush() calls. -> Fixed. -> ->The following enhancements have been made: -> ->1. Added facilities of "compact" into the compress program. "Pack", -> "Unpack", and "Pcat" are no longer required (no longer supplied). ->2. Installed work around for C compiler bug with "-O". ->3. Added a magic number header (\037\235). Put the bits specified -> in the file. ->4. Added "-f" flag to force overwrite of output file. ->5. Added "-c" flag and "zcat" program. 'ln compress zcat' after you -> compile. ->6. The 'uncompress' script has been deleted; simply -> 'ln compress uncompress' after you compile and it will work. ->7. Removed extra bit masking for machines that support unsigned -> characters. If your machine doesn't support unsigned characters, -> define "NO_UCHAR" when compiling. -> ->Compile "compress.c" with "-O -o compress" flags. Move "compress" to a ->standard executable location, such as /usr/local. Then: -> cd /usr/local -> ln compress uncompress -> ln compress zcat -> ->On machines that have a fixed stack size (such as Perkin-Elmer), set the ->stack to at least 12kb. ("setstack compress 12" on Perkin-Elmer). -> ->Next, install the manual (compress.l). -> cp compress.l /usr/man/manl - or - -> cp compress.l /usr/man/man1/compress.1 -> ->Here is the README that I sent with my first posting: -> ->>Enclosed is a modified version of compress.c, along with scripts to make it ->>run identically to pack(1), unpack(1), an pcat(1). Here is what I ->>(petsd!joe) and a colleague (petsd!peora!srd) did: ->> ->>1. Removed VAX dependencies. ->>2. Changed the struct to separate arrays; saves mucho memory. ->>3. Did comparisons in unsigned, where possible. (Faster on Perkin-Elmer.) ->>4. Sorted the character next chain and changed the search to stop ->>prematurely. This saves a lot on the execution time when compressing. ->> ->>This version is totally compatible with the original version. Even though ->>lint(1) -p has no complaints about compress.c, it won't run on a 16-bit ->>machine, due to the size of the arrays. ->> ->>Here is the README file from the original author: ->> ->>>Well, with all this discussion about file compression (for news batching ->>>in particular) going around, I decided to implement the text compression ->>>algorithm described in the June Computer magazine. The author claimed ->>>blinding speed and good compression ratios. It's certainly faster than ->>>compact (but, then, what wouldn't be), but it's also the same speed as ->>>pack, and gets better compression than both of them. On 350K bytes of ->>>unix-wizards, compact took about 8 minutes of CPU, pack took about 80 ->>>seconds, and compress (herein) also took 80 seconds. But, compact and ->>>pack got about 30% compression, whereas compress got over 50%. So, I ->>>decided I had something, and that others might be interested, too. ->>> ->>>As is probably true of compact and pack (although I haven't checked), ->>>the byte order within a word is probably relevant here, but as long as ->>>you stay on a single machine type, you should be ok. (Can anybody ->>>elucidate on this?) There are a couple of asm's in the code (extv and ->>>insv instructions), so anyone porting it to another machine will have to ->>>deal with this anyway (and could probably make it compatible with Vax ->>>byte order at the same time). Anyway, I've linted the code (both with ->>>and without -p), so it should run elsewhere. Note the longs in the ->>>code, you can take these out if you reduce BITS to <= 15. ->>> ->>>Have fun, and as always, if you make good enhancements, or bug fixes, ->>>I'd like to see them. ->>> ->>>=Spencer (thomas@utah-20, {harpo,hplabs,arizona}!utah-cs!thomas) ->> ->> regards, ->> joe ->> ->>-- ->>Full-Name: Joseph M. Orost ->>UUCP: ..!{decvax,ucbvax,ihnp4}!vax135!petsd!joe ->>US Mail: MS 313; Perkin-Elmer; 106 Apple St; Tinton Falls, NJ 07724 ->>Phone: (201) 870-5844 diff --git a/usr.bin/compress/doc/revision.log b/usr.bin/compress/doc/revision.log deleted file mode 100644 index 16ca49d..0000000 --- a/usr.bin/compress/doc/revision.log +++ /dev/null @@ -1,116 +0,0 @@ -/* - * $Hdr: compress.c,v 4.0 85/07/30 12:50:00 joe Release $ - * $Lg: compress.c,v $ - * Revision 4.0 85/07/30 12:50:00 joe - * Removed ferror() calls in output routine on every output except first. - * Prepared for release to the world. - * - * Revision 3.6 85/07/04 01:22:21 joe - * Remove much wasted storage by overlaying hash table with the tables - * used by decompress: tab_suffix[1<putc] and - * added signal catcher [plus beef in writeerr()] to delete effluvia. - * - * Revision 2.0 84/08/28 22:00:00 petsd!joe - * Add check for foreground before prompting user. Insert maxbits into - * compressed file. Force file being uncompressed to end with ".Z". - * Added "-c" flag and "zcat". Prepared for release. - * - * Revision 1.10 84/08/24 18:28:00 turtlevax!ken - * Will only compress regular files (no directories), added a magic number - * header (plus an undocumented -n flag to handle old files without headers), - * added -f flag to force overwriting of possibly existing destination file, - * otherwise the user is prompted for a response. Will tack on a .Z to a - * filename if it doesn't have one when decompressing. Will only replace - * file if it was compressed. - * - * Revision 1.9 84/08/16 17:28:00 turtlevax!ken - * Removed scanargs(), getopt(), added .Z extension and unlimited number of - * filenames to compress. Flags may be clustered (-Ddvb12) or separated - * (-D -d -v -b 12), or combination thereof. Modes and other status is - * copied with copystat(). -O bug for 4.2 seems to have disappeared with - * 1.8. - * - * Revision 1.8 84/08/09 23:15:00 joe - * Made it compatible with vax version, installed jim's fixes/enhancements - * - * Revision 1.6 84/08/01 22:08:00 joe - * Sped up algorithm significantly by sorting the compress chain. - * - * Revision 1.5 84/07/13 13:11:00 srd - * Added C version of vax asm routines. Changed structure to arrays to - * save much memory. Do unsigned compares where possible (faster on - * Perkin-Elmer) - * - * Revision 1.4 84/07/05 03:11:11 thomas - * Clean up the code a little and lint it. (Lint complains about all - * the regs used in the asm, but I'm not going to "fix" this.) - * - * Revision 1.3 84/07/05 02:06:54 thomas - * Minor fixes. - * - * Revision 1.2 84/07/05 00:27:27 thomas - * Add variable bit length output. - * - */ - -static char rcs_ident[] = - "$Hdr: compress.c,v 4.0 85/07/30 12:50:00 joe Release $"; diff --git a/usr.bin/compress/zopen.3 b/usr.bin/compress/zopen.3 deleted file mode 100644 index 32be9c4..0000000 --- a/usr.bin/compress/zopen.3 +++ /dev/null @@ -1,138 +0,0 @@ -.\" $NetBSD: zopen.3,v 1.7 2003/08/07 11:13:29 agc Exp $ -.\" -.\" Copyright (c) 1992, 1993 -.\" The Regents of the University of California. All rights reserved. -.\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" 3. Neither the name of the University nor the names of its contributors -.\" may be used to endorse or promote products derived from this software -.\" without specific prior written permission. -.\" -.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. -.\" -.\" @(#)zopen.3 8.1 (Berkeley) 6/9/93 -.\" -.Dd June 9, 1993 -.Dt ZOPEN 3 -.Os -.Sh NAME -.Nm zopen -.Nd compressed stream open function -.Sh SYNOPSIS -.In stdio.h -.Ft FILE * -.Fn zopen "const char *path" "const char *mode" "int bits" -.Sh DESCRIPTION -The -.Fn zopen -function -opens the compressed file whose name is the string pointed to by -.Fa path -and associates a stream with it. -.Pp -The argument -.Fa mode -points to one of the following one-character strings: -.Bl -tag -width indent -.It Dq Li r -Open compressed file for reading. -The stream is positioned at the beginning of the file. -.It Dq Li w -Truncate file to zero length or create compressed file for writing. -The stream is positioned at the beginning of the file. -.El -.Pp -Any created files will have mode -.Pf \\*q Dv S_IRUSR -\&| -.Dv S_IWUSR -\&| -.Dv S_IRGRP -\&| -.Dv S_IWGRP -\&| -.Dv S_IROTH -\&| -.Dv S_IWOTH Ns \\*q -.Pq Li 0666 , -as modified by the process' -umask value (see -.Xr umask 2 ) . -.Pp -Files may only be read or written. -Seek operations are not allowed. -.Pp -The -.Fa bits -argument, if non-zero, is set to the bits code limit. -If zero, the default is 16. -See -.Fn compress 1 -for more information. -.Sh RETURN VALUES -Upon successful completion -.Fn zopen -returns a -.Tn FILE -pointer. -Otherwise, -.Dv NULL -is returned and the global variable -.Va errno -is set to indicate the error. -.Sh ERRORS -.Bl -tag -width [EINVAL] -.It Bq Er EINVAL -The -.Fa mode -or -.Fa bits -arguments specified to -.Fn zopen -were invalid. -.It Bq Er EFTYPE -The compressed file starts with an invalid header, or the compressed -file is compressed with more bits than can be handled. -.El -.Pp -The -.Fn zopen -function may also fail and set -.Va errno -for any of the errors specified for the routines -.Xr fopen 3 -or -.Xr funopen 3 . -.Sh SEE ALSO -.Xr compress 1 , -.Xr fopen 3 , -.Xr funopen 3 -.Sh HISTORY -The -.Nm zopen -function -first appeared in -.Bx 4.4 . -.Sh BUGS -The -.Fn zopen -function -may not be portable to systems other than -.Bx . diff --git a/usr.bin/compress/zopen.c b/usr.bin/compress/zopen.c deleted file mode 100644 index 0d3f178..0000000 --- a/usr.bin/compress/zopen.c +++ /dev/null @@ -1,699 +0,0 @@ -/* $NetBSD: zopen.c,v 1.15 2011/08/16 13:55:01 joerg Exp $ */ - -/*- - * Copyright (c) 1985, 1986, 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Diomidis Spinellis and James A. Woods, derived from original - * work by Spencer Thomas and Joseph Orost. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(LIBC_SCCS) && !defined(lint) -#if 0 -static char sccsid[] = "@(#)zopen.c 8.1 (Berkeley) 6/27/93"; -#else -static char rcsid[] = "$NetBSD: zopen.c,v 1.15 2011/08/16 13:55:01 joerg Exp $"; -#endif -#endif /* LIBC_SCCS and not lint */ - -/*- - * fcompress.c - File compression ala IEEE Computer, June 1984. - * - * Compress authors: - * Spencer W. Thomas (decvax!utah-cs!thomas) - * Jim McKie (decvax!mcvax!jim) - * Steve Davies (decvax!vax135!petsd!peora!srd) - * Ken Turkowski (decvax!decwrl!turtlevax!ken) - * James A. Woods (decvax!ihnp4!ames!jaw) - * Joe Orost (decvax!vax135!petsd!joe) - * - * Cleaned up and converted to library returning I/O streams by - * Diomidis Spinellis . - * - * zopen(filename, mode, bits) - * Returns a FILE * that can be used for read or write. The modes - * supported are only "r" and "w". Seeking is not allowed. On - * reading the file is decompressed, on writing it is compressed. - * The output is compatible with compress(1) with 16 bit tables. - * Any file produced by compress(1) can be read. - */ - -#include -#include - -#include -#include -#include -#include -#include -#include - -#define BITS 16 /* Default bits. */ -#define HSIZE 69001 /* 95% occupancy */ - -/* A code_int must be able to hold 2**BITS values of type int, and also -1. */ -typedef long code_int; -typedef long count_int; - -typedef u_char char_type; -static char_type magic_header[] = - {'\037', '\235'}; /* 1F 9D */ - -#define BIT_MASK 0x1f /* Defines for third byte of header. */ -#define BLOCK_MASK 0x80 - -/* - * Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is - * a fourth header byte (for expansion). - */ -#define INIT_BITS 9 /* Initial number of bits/code. */ - -#define MAXCODE(n_bits) ((1 << (n_bits)) - 1) - -struct s_zstate { - FILE *zs_fp; /* File stream for I/O */ - char zs_mode; /* r or w */ - enum { - S_START, S_MIDDLE, S_EOF - } zs_state; /* State of computation */ - int zs_n_bits; /* Number of bits/code. */ - int zs_maxbits; /* User settable max # bits/code. */ - code_int zs_maxcode; /* Maximum code, given n_bits. */ - code_int zs_maxmaxcode; /* Should NEVER generate this code. */ - count_int zs_htab [HSIZE]; - u_short zs_codetab [HSIZE]; - code_int zs_hsize; /* For dynamic table sizing. */ - code_int zs_free_ent; /* First unused entry. */ - /* - * Block compression parameters -- after all codes are used up, - * and compression rate changes, start over. - */ - int zs_block_compress; - int zs_clear_flg; - long zs_ratio; - count_int zs_checkpoint; - int zs_offset; - long zs_in_count; /* Length of input. */ - long zs_bytes_out; /* Length of compressed output. */ - long zs_out_count; /* # of codes output (for debugging). */ - char_type zs_buf[BITS]; - union { - struct { - long zs_fcode; - code_int zs_ent; - code_int zs_hsize_reg; - int zs_hshift; - } w; /* Write paramenters */ - struct { - char_type *zs_stackp; - int zs_finchar; - code_int zs_code, zs_oldcode, zs_incode; - int zs_roffset, zs_size; - char_type zs_gbuf[BITS]; - } r; /* Read parameters */ - } u; -}; - -/* Definitions to retain old variable names */ -#define fp zs->zs_fp -#define zmode zs->zs_mode -#define state zs->zs_state -#define n_bits zs->zs_n_bits -#define maxbits zs->zs_maxbits -#define maxcode zs->zs_maxcode -#define maxmaxcode zs->zs_maxmaxcode -#define htab zs->zs_htab -#define codetab zs->zs_codetab -#define hsize zs->zs_hsize -#define free_ent zs->zs_free_ent -#define block_compress zs->zs_block_compress -#define clear_flg zs->zs_clear_flg -#define ratio zs->zs_ratio -#define checkpoint zs->zs_checkpoint -#define offset zs->zs_offset -#define in_count zs->zs_in_count -#define bytes_out zs->zs_bytes_out -#define out_count zs->zs_out_count -#define buf zs->zs_buf -#define fcode zs->u.w.zs_fcode -#define hsize_reg zs->u.w.zs_hsize_reg -#define ent zs->u.w.zs_ent -#define hshift zs->u.w.zs_hshift -#define stackp zs->u.r.zs_stackp -#define finchar zs->u.r.zs_finchar -#define code zs->u.r.zs_code -#define oldcode zs->u.r.zs_oldcode -#define incode zs->u.r.zs_incode -#define roffset zs->u.r.zs_roffset -#define size zs->u.r.zs_size -#define gbuf zs->u.r.zs_gbuf - -/* - * To save much memory, we overlay the table used by compress() with those - * used by decompress(). The tab_prefix table is the same size and type as - * the codetab. The tab_suffix table needs 2**BITS characters. We get this - * from the beginning of htab. The output stack uses the rest of htab, and - * contains characters. There is plenty of room for any possible stack - * (stack used to be 8000 characters). - */ - -#define htabof(i) htab[i] -#define codetabof(i) codetab[i] - -#define tab_prefixof(i) codetabof(i) -#define tab_suffixof(i) ((char_type *)(htab))[i] -#define de_stack ((char_type *)&tab_suffixof(1 << BITS)) - -#define CHECK_GAP 10000 /* Ratio check interval. */ - -/* - * the next two codes should not be changed lightly, as they must not - * lie within the contiguous general code space. - */ -#define FIRST 257 /* First free entry. */ -#define CLEAR 256 /* Table clear output code. */ - -static int cl_block(struct s_zstate *); -static code_int getcode(struct s_zstate *); -static int output(struct s_zstate *, code_int); -static int zclose(void *); -FILE *zopen(const char *, const char *, int); -static int zread(void *, char *, int); -static int zwrite(void *, const char *, int); - -/*- - * Algorithm from "A Technique for High Performance Data Compression", - * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. - * - * Algorithm: - * Modified Lempel-Ziv method (LZW). Basically finds common - * substrings and replaces them with a variable size code. This is - * deterministic, and can be done on the fly. Thus, the decompression - * procedure needs no input table, but tracks the way the table was built. - */ - -/*- - * compress write - * - * Algorithm: use open addressing double hashing (no chaining) on the - * prefix code / next character combination. We do a variant of Knuth's - * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime - * secondary probe. Here, the modular division first probe is gives way - * to a faster exclusive-or manipulation. Also do block compression with - * an adaptive reset, whereby the code table is cleared when the compression - * ratio decreases, but after the table fills. The variable-length output - * codes are re-sized at this point, and a special CLEAR code is generated - * for the decompressor. Late addition: construct the table according to - * file size for noticeable speed improvement on small files. Please direct - * questions about this implementation to ames!jaw. - */ -static int -zwrite(void *cookie, const char *wbp, int num) -{ - code_int i; - int c, disp; - struct s_zstate *zs; - const u_char *bp; - u_char tmp; - int count; - - if (num == 0) - return (0); - - zs = cookie; - count = num; - bp = (const u_char *)wbp; - if (state == S_MIDDLE) - goto middle; - state = S_MIDDLE; - - maxmaxcode = 1L << maxbits; - if (fwrite(magic_header, - sizeof(char), sizeof(magic_header), fp) != sizeof(magic_header)) - return (-1); - tmp = (u_char)(maxbits | block_compress); - if (fwrite(&tmp, sizeof(char), sizeof(tmp), fp) != sizeof(tmp)) - return (-1); - - offset = 0; - bytes_out = 3; /* Includes 3-byte header mojo. */ - out_count = 0; - clear_flg = 0; - ratio = 0; - in_count = 1; - checkpoint = CHECK_GAP; - maxcode = MAXCODE(n_bits = INIT_BITS); - free_ent = ((block_compress) ? FIRST : 256); - - ent = *bp++; - --count; - - hshift = 0; - for (fcode = (long)hsize; fcode < 65536L; fcode *= 2L) - hshift++; - hshift = 8 - hshift; /* Set hash code range bound. */ - - hsize_reg = hsize; - memset(htab, 0xff, hsize_reg * sizeof(count_int)); - -middle: while (count--) { - c = *bp++; - in_count++; - fcode = (long)(((long)c << maxbits) + ent); - i = ((c << hshift) ^ ent); /* Xor hashing. */ - - if (htabof(i) == fcode) { - ent = codetabof(i); - continue; - } else if ((long)htabof(i) < 0) /* Empty slot. */ - goto nomatch; - disp = hsize_reg - i; /* Secondary hash (after G. Knott). */ - if (i == 0) - disp = 1; -probe: if ((i -= disp) < 0) - i += hsize_reg; - - if (htabof(i) == fcode) { - ent = codetabof(i); - continue; - } - if ((long)htabof(i) >= 0) - goto probe; -nomatch: if (output(zs, (code_int) ent) == -1) - return (-1); - out_count++; - ent = c; - if (free_ent < maxmaxcode) { - codetabof(i) = free_ent++; /* code -> hashtable */ - htabof(i) = fcode; - } else if ((count_int)in_count >= - checkpoint && block_compress) { - if (cl_block(zs) == -1) - return (-1); - } - } - return (num); -} - -static int -zclose(void *cookie) -{ - struct s_zstate *zs; - int rval; - - zs = cookie; - if (zmode == 'w') { /* Put out the final code. */ - if (output(zs, (code_int) ent) == -1) { - (void)fclose(fp); - free(zs); - return (-1); - } - out_count++; - if (output(zs, (code_int) - 1) == -1) { - (void)fclose(fp); - free(zs); - return (-1); - } - } - rval = fclose(fp) == EOF ? -1 : 0; - free(zs); - return (rval); -} - -/*- - * Output the given code. - * Inputs: - * code: A n_bits-bit integer. If == -1, then EOF. This assumes - * that n_bits =< (long)wordsize - 1. - * Outputs: - * Outputs code to the file. - * Assumptions: - * Chars are 8 bits long. - * Algorithm: - * Maintain a BITS character long buffer (so that 8 codes will - * fit in it exactly). Use the VAX insv instruction to insert each - * code in turn. When the buffer fills up empty it and start over. - */ - -static char_type lmask[9] = - {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00}; -static char_type rmask[9] = - {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; - -static int -output(struct s_zstate *zs, code_int ocode) -{ - int bits, r_off; - char_type *bp; - - r_off = offset; - bits = n_bits; - bp = buf; - if (ocode >= 0) { - /* Get to the first byte. */ - bp += (r_off >> 3); - r_off &= 7; - /* - * Since ocode is always >= 8 bits, only need to mask the first - * hunk on the left. - */ - *bp = (*bp & rmask[r_off]) | ((ocode << r_off) & lmask[r_off]); - bp++; - bits -= (8 - r_off); - ocode >>= 8 - r_off; - /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ - if (bits >= 8) { - *bp++ = ocode; - ocode >>= 8; - bits -= 8; - } - /* Last bits. */ - if (bits) - *bp = ocode; - offset += n_bits; - if (offset == (n_bits << 3)) { - bp = buf; - bits = n_bits; - bytes_out += bits; - if (fwrite(bp, sizeof(char), bits, fp) != (size_t)bits) - return (-1); - bp += bits; - bits = 0; - offset = 0; - } - /* - * If the next entry is going to be too big for the ocode size, - * then increase it, if possible. - */ - if (free_ent > maxcode || (clear_flg > 0)) { - /* - * Write the whole buffer, because the input side won't - * discover the size increase until after it has read it. - */ - if (offset > 0) { - if (fwrite(buf, 1, n_bits, fp) != (size_t)n_bits) - return (-1); - bytes_out += n_bits; - } - offset = 0; - - if (clear_flg) { - maxcode = MAXCODE(n_bits = INIT_BITS); - clear_flg = 0; - } else { - n_bits++; - if (n_bits == maxbits) - maxcode = maxmaxcode; - else - maxcode = MAXCODE(n_bits); - } - } - } else { - /* At EOF, write the rest of the buffer. */ - if (offset > 0) { - offset = (offset + 7) / 8; - if (fwrite(buf, 1, offset, fp) != (size_t)offset) - return (-1); - bytes_out += offset; - } - offset = 0; - } - return (0); -} - -/* - * Decompress read. This routine adapts to the codes in the file building - * the "string" table on-the-fly; requiring no table to be stored in the - * compressed file. The tables used herein are shared with those of the - * compress() routine. See the definitions above. - */ -static int -zread(void *cookie, char *rbp, int num) -{ - u_int count; - struct s_zstate *zs; - u_char *bp, header[3]; - - if (num == 0) - return (0); - - zs = cookie; - count = num; - bp = (u_char *)rbp; - switch (state) { - case S_START: - state = S_MIDDLE; - break; - case S_MIDDLE: - goto middle; - case S_EOF: - goto eof; - } - - /* Check the magic number */ - if (fread(header, - sizeof(char), sizeof(header), fp) != sizeof(header) || - memcmp(header, magic_header, sizeof(magic_header)) != 0) { - errno = EFTYPE; - return (-1); - } - maxbits = header[2]; /* Set -b from file. */ - block_compress = maxbits & BLOCK_MASK; - maxbits &= BIT_MASK; - maxmaxcode = 1L << maxbits; - if (maxbits > BITS || maxbits < 12) { - errno = EFTYPE; - return (-1); - } - /* As above, initialize the first 256 entries in the table. */ - maxcode = MAXCODE(n_bits = INIT_BITS); - for (code = 255; code >= 0; code--) { - tab_prefixof(code) = 0; - tab_suffixof(code) = (char_type) code; - } - free_ent = block_compress ? FIRST : 256; - oldcode = -1; - stackp = de_stack; - - while ((code = getcode(zs)) > -1) { - - if ((code == CLEAR) && block_compress) { - for (code = 255; code >= 0; code--) - tab_prefixof(code) = 0; - clear_flg = 1; - free_ent = FIRST; - oldcode = -1; - continue; - } - incode = code; - - /* Special case for kWkWk string. */ - if (code >= free_ent) { - if (code > free_ent || oldcode == -1) { - /* Bad stream. */ - errno = EINVAL; - return (-1); - } - *stackp++ = finchar; - code = oldcode; - } - /* - * The above condition ensures that code < free_ent. - * The construction of tab_prefixof in turn guarantees that - * each iteration decreases code and therefore stack usage is - * bound by 1 << BITS - 256. - */ - - /* Generate output characters in reverse order. */ - while (code >= 256) { - *stackp++ = tab_suffixof(code); - code = tab_prefixof(code); - } - *stackp++ = finchar = tab_suffixof(code); - - /* And put them out in forward order. */ -middle: do { - if (count-- == 0) - return (num); - *bp++ = *--stackp; - } while (stackp > de_stack); - - /* Generate the new entry. */ - if ((code = free_ent) < maxmaxcode && oldcode != -1) { - tab_prefixof(code) = (u_short) oldcode; - tab_suffixof(code) = finchar; - free_ent = code + 1; - } - - /* Remember previous code. */ - oldcode = incode; - } - state = S_EOF; -eof: return (num - count); -} - -/*- - * Read one code from the standard input. If EOF, return -1. - * Inputs: - * stdin - * Outputs: - * code or -1 is returned. - */ -static code_int -getcode(struct s_zstate *zs) -{ - code_int gcode; - int r_off, bits; - char_type *bp; - - bp = gbuf; - if (clear_flg > 0 || roffset >= size || free_ent > maxcode) { - /* - * If the next entry will be too big for the current gcode - * size, then we must increase the size. This implies reading - * a new buffer full, too. - */ - if (free_ent > maxcode) { - n_bits++; - if (n_bits == maxbits) /* Won't get any bigger now. */ - maxcode = maxmaxcode; - else - maxcode = MAXCODE(n_bits); - } - if (clear_flg > 0) { - maxcode = MAXCODE(n_bits = INIT_BITS); - clear_flg = 0; - } - size = fread(gbuf, 1, n_bits, fp); - if (size <= 0) /* End of file. */ - return (-1); - roffset = 0; - /* Round size down to integral number of codes. */ - size = (size << 3) - (n_bits - 1); - } - r_off = roffset; - bits = n_bits; - - /* Get to the first byte. */ - bp += (r_off >> 3); - r_off &= 7; - - /* Get first part (low order bits). */ - gcode = (*bp++ >> r_off); - bits -= (8 - r_off); - r_off = 8 - r_off; /* Now, roffset into gcode word. */ - - /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ - if (bits >= 8) { - gcode |= *bp++ << r_off; - r_off += 8; - bits -= 8; - } - - /* High order bits. */ - gcode |= (*bp & rmask[bits]) << r_off; - roffset += n_bits; - - return (gcode); -} - -static int -cl_block(struct s_zstate *zs) /* Table clear for block compress. */ -{ - long rat; - - checkpoint = in_count + CHECK_GAP; - - if (in_count > 0x007fffff) { /* Shift will overflow. */ - rat = bytes_out >> 8; - if (rat == 0) /* Don't divide by zero. */ - rat = 0x7fffffff; - else - rat = in_count / rat; - } else - rat = (in_count << 8) / bytes_out; /* 8 fractional bits. */ - if (rat > ratio) - ratio = rat; - else { - ratio = 0; - memset(htab, 0xff, hsize * sizeof(count_int)); - free_ent = FIRST; - clear_flg = 1; - if (output(zs, (code_int) CLEAR) == -1) - return (-1); - } - return (0); -} - -FILE * -zopen(const char *fname, const char *mode, int bits) -{ - struct s_zstate *zs; - - if ((mode[0] != 'r' && mode[0] != 'w') || mode[1] != '\0' || - bits < 0 || bits > BITS) { - errno = EINVAL; - return (NULL); - } - - if ((zs = calloc(1, sizeof(struct s_zstate))) == NULL) - return (NULL); - - maxbits = bits ? bits : BITS; /* User settable max # bits/code. */ - maxmaxcode = 1 << maxbits; /* Should NEVER generate this code. */ - hsize = HSIZE; /* For dynamic table sizing. */ - free_ent = 0; /* First unused entry. */ - block_compress = BLOCK_MASK; - clear_flg = 0; - ratio = 0; - checkpoint = CHECK_GAP; - in_count = 1; /* Length of input. */ - out_count = 0; /* # of codes output (for debugging). */ - state = S_START; - roffset = 0; - size = 0; - - /* - * Layering compress on top of stdio in order to provide buffering, - * and ensure that reads and write work with the data specified. - */ - if ((fp = fopen(fname, mode)) == NULL) { - free(zs); - return (NULL); - } - switch (*mode) { - case 'r': - zmode = 'r'; - return (funopen(zs, zread, NULL, NULL, zclose)); - case 'w': - zmode = 'w'; - return (funopen(zs, NULL, zwrite, NULL, zclose)); - } - /* NOTREACHED */ - return (NULL); -} -- cgit v1.2.3-60-g2f50