/** * binsub.c / 2022-12-09 * * (C) 2022 Zach van Rijn * * MIT License * * This utility truncates or replaces needles in an input file; * truncation meaning the replacement string is empty. * * Replacement string length must be less than or equal to that * of the needle because the file length must remain unchanged. * * For efficient operation, consider deploying this on a '.tar' * file instead of individual files within a directory. */ #include #include #include /** * Basic memory structure. */ struct buffer { char *data; size_t len; }; /** * Given a needle ('find') and optional replacement ('repl'), if * the needle is found, truncate it, inject the replacement, and * pad the tail end of the matching string with null bytes. * * The file length remains the same, and we hope that nobody is * relying on precomputed offsets into the strings. Mega kludge! * * Replacement = "": * * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * in |.|.|.|N|E|E|D|L|E|.|.|.|O|T|H|E|R| |D|A|T|A|.|.|.|0| * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * ^ shift data ^ * +-------------------------------+ * * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * out |.|.|.|.|.|.|O|T|H|E|R| |D|A|T|A|.|.|.|0|0|0|0|0|0|0| * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * ^ shift data ^ * +-------------------------------+ * * Replacement = "FOO": * * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * in |.|.|.|N|E|E|D|L|E|.|.|.|O|T|H|E|R| |D|A|T|A|.|.|.|0| * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * ^ shift data ^ * +-------------------------------+ * * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * out |.|.|.|F|O|O|.|.|.|O|T|H|E|R| |D|A|T|A|.|.|.|0|0|0|0| * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * ^ FOO ^ shift data ^ * +=====+-------------------------------+ */ void replace (struct buffer *buf, const char *find, const char *repl) { char *match; /* pointer to found needle */ size_t idx; /* cursor into file buffer */ size_t nlen; /* length of needle */ size_t rlen = 0; /* length of replacement */ size_t mlen; /* length of matching string */ nlen = strlen(find); /** * Iterate over each character in the current string in the * buffer, because multiple full matches may be possible. If * we cannot find a match in the current string, skip to the * next string. I don't think there is a more optimal way? */ for (idx = 0; idx <= buf->len; idx++) { /** * Does the current string contain the needle? */ match = strstr(buf->data + idx, find); if (match) { /** * How long is the current string? We need to search * it until we cannot find any more matches. */ mlen = strlen(match); printf("%10zu bytes at offset 0x%010lx (%02ld%%)\n", mlen, (match - buf->data), (100 * (match - buf->data)) / buf->len ); /** * The replacement string is l.e. the length of the * needle, so if it is non-empty, inject it first. */ if (repl) { rlen = strlen(repl); memcpy(match, repl, rlen); } /** * The replacement length may be zero (if empty). In * any case, copy the non-needle string remainder to * the current matched (needle) location plus offset * of any injected replacement. Zero out the tail. */ memmove(match + rlen, match + nlen, mlen - nlen); memset(match + mlen - nlen + rlen, 0, nlen - rlen); } else { /** * This is a partial optimization. Don't bother with * searching for needles in the rest of this string; * we already know none exist. */ idx += strlen(buf->data + idx); } } } /** * Read the contents of a file into a newly allocated buffer. It * is possible to 'mmap()', but it leaves less room for checks. */ void scanner (const char *file, const char *find, const char *repl) { FILE *fp = NULL; size_t nb = 0; struct buffer buf; memset(&buf, 0, sizeof(struct buffer)); fp = fopen(file, "rb+"); if (!fp) { fprintf(stderr, "E: Could not open FILE '%s' for reading!\n", file ); return; } fseek(fp, 0, SEEK_END); buf.len = ftell(fp); fseek(fp, 0, SEEK_SET); /** * Allocate memory for the entire file at once. This is not * ideal, but we don't expect large files for our use case. */ buf.data = malloc(buf.len + 1); if (!buf.data) { fclose(fp); fprintf(stderr, "E: Could not allocate '%zu' bytes for file '%s'\n", buf.len, file ); return; } buf.data[buf.len] = 0; /* extra byte needs to be nil */ nb = fread(buf.data, 1, buf.len, fp); if (nb != buf.len) { free(buf.data); buf.data = NULL; fclose(fp); fprintf(stderr, "E: Only read '%zu' / '%zu' bytes of file '%s'\n", nb, buf.len, file ); return; } fclose(fp); printf("Examining file '%s'...\n", file); replace(&buf, find, repl); fp = fopen(file, "wb"); if (!fp) { fprintf(stderr, "E: Could not open FILE '%s' for writing!\n", file ); return; } nb = fwrite(buf.data, 1, buf.len, fp); if (nb != buf.len) { free(buf.data); buf.data = NULL; fclose(fp); fprintf(stderr, "E: Only wrote '%zu' / '%zu' bytes of file '%s'\n", nb, buf.len, file ); return; } fclose(fp); free(buf.data); } /** * WARNING! * * This program replaces all occurrences of NEEDLE within string * sections of an input file with the string REPLACE. The input * file is overwritten. Few, if any, sanity checks are in place. */ int main (int argc, char **argv) { char *prog = NULL; char *file = NULL; char *find = NULL; char *repl = NULL; prog = argv[0]; switch (argc) { case 3: file = argv[1]; find = argv[2]; break; case 4: file = argv[1]; find = argv[2]; repl = argv[3]; if (strlen(repl) > strlen(find)) { fprintf(stderr, "E: REPLACE cannot be longer than NEEDLE\n" ); return 1; } break; default: fprintf(stderr, "Usage: %s FILE NEEDLE [REPLACE]\n", prog ); return 1; } scanner(file, find, repl); return 0; }