diff options
Diffstat (limited to 'binsub.c')
-rw-r--r-- | binsub.c | 277 |
1 files changed, 277 insertions, 0 deletions
diff --git a/binsub.c b/binsub.c new file mode 100644 index 0000000..c66148c --- /dev/null +++ b/binsub.c @@ -0,0 +1,277 @@ +/** + * binsub.c / 2022-12-09 + * + * (C) 2022 Zach van Rijn <me@zv.io> + * + * MIT License + * + * This utility truncates or replaces needles in an input file; + * truncation meaning the replacement string is empty. + * + * Replacement string length must be less than or equal to that + * of the needle because the file length must remain unchanged. + * + * For efficient operation, consider deploying this on a '.tar' + * file instead of individual files within a directory. + */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +/** + * Basic memory structure. + */ +struct buffer +{ + char *data; + size_t len; +}; + + +/** + * Given a needle ('find') and optional replacement ('repl'), if + * the needle is found, truncate it, inject the replacement, and + * pad the tail end of the matching string with null bytes. + * + * The file length remains the same, and we hope that nobody is + * relying on precomputed offsets into the strings. Mega kludge! + * + * Replacement = "": + * + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * in |.|.|.|N|E|E|D|L|E|.|.|.|O|T|H|E|R| |D|A|T|A|.|.|.|0| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * ^ shift data ^ + * +-------------------------------+ + * + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * out |.|.|.|.|.|.|O|T|H|E|R| |D|A|T|A|.|.|.|0|0|0|0|0|0|0| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * ^ shift data ^ + * +-------------------------------+ + * + * Replacement = "FOO": + * + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * in |.|.|.|N|E|E|D|L|E|.|.|.|O|T|H|E|R| |D|A|T|A|.|.|.|0| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * ^ shift data ^ + * +-------------------------------+ + * + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * out |.|.|.|F|O|O|.|.|.|O|T|H|E|R| |D|A|T|A|.|.|.|0|0|0|0| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * ^ FOO ^ shift data ^ + * +=====+-------------------------------+ + */ +void +replace (struct buffer *buf, const char *find, const char *repl) +{ + char *match; /* pointer to found needle */ + size_t idx; /* cursor into file buffer */ + + size_t nlen; /* length of needle */ + size_t rlen = 0; /* length of replacement */ + + size_t mlen; /* length of matching string */ + + nlen = strlen(find); + + /** + * Iterate over each character in the current string in the + * buffer, because multiple full matches may be possible. If + * we cannot find a match in the current string, skip to the + * next string. I don't think there is a more optimal way? + */ + for (idx = 0; idx <= buf->len; idx++) + { + /** + * Does the current string contain the needle? + */ + match = strstr(buf->data + idx, find); + if (match) + { + /** + * How long is the current string? We need to search + * it until we cannot find any more matches. + */ + mlen = strlen(match); + printf("%10zu bytes at offset 0x%010lx (%02ld%%)\n", + mlen, + (match - buf->data), + (100 * (match - buf->data)) / buf->len + ); + + /** + * The replacement string is l.e. the length of the + * needle, so if it is non-empty, inject it first. + */ + if (repl) + { + rlen = strlen(repl); + memcpy(match, repl, rlen); + } + + /** + * The replacement length may be zero (if empty). In + * any case, copy the non-needle string remainder to + * the current matched (needle) location plus offset + * of any injected replacement. Zero out the tail. + */ + memmove(match + rlen, match + nlen, mlen - nlen); + memset(match + mlen - nlen + rlen, 0, nlen - rlen); + } + else + { + /** + * This is a partial optimization. Don't bother with + * searching for needles in the rest of this string; + * we already know none exist. + */ + idx += strlen(buf->data + idx); + } + } +} + + +/** + * Read the contents of a file into a newly allocated buffer. It + * is possible to 'mmap()', but it leaves less room for checks. + */ +void +scanner (const char *file, const char *find, const char *repl) +{ + FILE *fp = NULL; + size_t nb = 0; + + struct buffer buf; + memset(&buf, 0, sizeof(struct buffer)); + + fp = fopen(file, "rb+"); + if (!fp) + { + fprintf(stderr, + "E: Could not open FILE '%s' for reading!\n", + file + ); + return; + } + + fseek(fp, 0, SEEK_END); + buf.len = ftell(fp); + fseek(fp, 0, SEEK_SET); + + /** + * Allocate memory for the entire file at once. This is not + * ideal, but we don't expect large files for our use case. + */ + buf.data = malloc(buf.len + 1); + if (!buf.data) + { + fclose(fp); + fprintf(stderr, + "E: Could not allocate '%zu' bytes for file '%s'\n", + buf.len, + file + ); + return; + } + buf.data[buf.len] = 0; /* extra byte needs to be nil */ + + nb = fread(buf.data, 1, buf.len, fp); + if (nb != buf.len) + { + free(buf.data); + buf.data = NULL; + fclose(fp); + fprintf(stderr, + "E: Only read '%zu' / '%zu' bytes of file '%s'\n", + nb, + buf.len, + file + ); + return; + } + fclose(fp); + + printf("Examining file '%s'...\n", file); + replace(&buf, find, repl); + + fp = fopen(file, "wb"); + if (!fp) + { + fprintf(stderr, + "E: Could not open FILE '%s' for writing!\n", + file + ); + return; + } + + nb = fwrite(buf.data, 1, buf.len, fp); + if (nb != buf.len) + { + free(buf.data); + buf.data = NULL; + fclose(fp); + fprintf(stderr, + "E: Only wrote '%zu' / '%zu' bytes of file '%s'\n", + nb, + buf.len, + file + ); + return; + } + fclose(fp); + + free(buf.data); +} + + +/** + * WARNING! + * + * This program replaces all occurrences of NEEDLE within string + * sections of an input file with the string REPLACE. The input + * file is overwritten. Few, if any, sanity checks are in place. + */ +int +main (int argc, char **argv) +{ + char *prog = NULL; + char *file = NULL; + char *find = NULL; + char *repl = NULL; + + prog = argv[0]; + + switch (argc) + { + case 3: + file = argv[1]; + find = argv[2]; + break; + case 4: + file = argv[1]; + find = argv[2]; + repl = argv[3]; + if (strlen(repl) > strlen(find)) + { + fprintf(stderr, + "E: REPLACE cannot be longer than NEEDLE\n" + ); + return 1; + } + break; + default: + fprintf(stderr, + "Usage: %s FILE NEEDLE [REPLACE]\n", + prog + ); + return 1; + } + + scanner(file, find, repl); + + return 0; +} |