summaryrefslogtreecommitdiff
path: root/binsub.c
diff options
context:
space:
mode:
Diffstat (limited to 'binsub.c')
-rw-r--r--binsub.c277
1 files changed, 277 insertions, 0 deletions
diff --git a/binsub.c b/binsub.c
new file mode 100644
index 0000000..c66148c
--- /dev/null
+++ b/binsub.c
@@ -0,0 +1,277 @@
+/**
+ * binsub.c / 2022-12-09
+ *
+ * (C) 2022 Zach van Rijn <me@zv.io>
+ *
+ * MIT License
+ *
+ * This utility truncates or replaces needles in an input file;
+ * truncation meaning the replacement string is empty.
+ *
+ * Replacement string length must be less than or equal to that
+ * of the needle because the file length must remain unchanged.
+ *
+ * For efficient operation, consider deploying this on a '.tar'
+ * file instead of individual files within a directory.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+/**
+ * Basic memory structure.
+ */
+struct buffer
+{
+ char *data;
+ size_t len;
+};
+
+
+/**
+ * Given a needle ('find') and optional replacement ('repl'), if
+ * the needle is found, truncate it, inject the replacement, and
+ * pad the tail end of the matching string with null bytes.
+ *
+ * The file length remains the same, and we hope that nobody is
+ * relying on precomputed offsets into the strings. Mega kludge!
+ *
+ * Replacement = "":
+ *
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * in |.|.|.|N|E|E|D|L|E|.|.|.|O|T|H|E|R| |D|A|T|A|.|.|.|0|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * ^ shift data ^
+ * +-------------------------------+
+ *
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * out |.|.|.|.|.|.|O|T|H|E|R| |D|A|T|A|.|.|.|0|0|0|0|0|0|0|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * ^ shift data ^
+ * +-------------------------------+
+ *
+ * Replacement = "FOO":
+ *
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * in |.|.|.|N|E|E|D|L|E|.|.|.|O|T|H|E|R| |D|A|T|A|.|.|.|0|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * ^ shift data ^
+ * +-------------------------------+
+ *
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * out |.|.|.|F|O|O|.|.|.|O|T|H|E|R| |D|A|T|A|.|.|.|0|0|0|0|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * ^ FOO ^ shift data ^
+ * +=====+-------------------------------+
+ */
+void
+replace (struct buffer *buf, const char *find, const char *repl)
+{
+ char *match; /* pointer to found needle */
+ size_t idx; /* cursor into file buffer */
+
+ size_t nlen; /* length of needle */
+ size_t rlen = 0; /* length of replacement */
+
+ size_t mlen; /* length of matching string */
+
+ nlen = strlen(find);
+
+ /**
+ * Iterate over each character in the current string in the
+ * buffer, because multiple full matches may be possible. If
+ * we cannot find a match in the current string, skip to the
+ * next string. I don't think there is a more optimal way?
+ */
+ for (idx = 0; idx <= buf->len; idx++)
+ {
+ /**
+ * Does the current string contain the needle?
+ */
+ match = strstr(buf->data + idx, find);
+ if (match)
+ {
+ /**
+ * How long is the current string? We need to search
+ * it until we cannot find any more matches.
+ */
+ mlen = strlen(match);
+ printf("%10zu bytes at offset 0x%010lx (%02ld%%)\n",
+ mlen,
+ (match - buf->data),
+ (100 * (match - buf->data)) / buf->len
+ );
+
+ /**
+ * The replacement string is l.e. the length of the
+ * needle, so if it is non-empty, inject it first.
+ */
+ if (repl)
+ {
+ rlen = strlen(repl);
+ memcpy(match, repl, rlen);
+ }
+
+ /**
+ * The replacement length may be zero (if empty). In
+ * any case, copy the non-needle string remainder to
+ * the current matched (needle) location plus offset
+ * of any injected replacement. Zero out the tail.
+ */
+ memmove(match + rlen, match + nlen, mlen - nlen);
+ memset(match + mlen - nlen + rlen, 0, nlen - rlen);
+ }
+ else
+ {
+ /**
+ * This is a partial optimization. Don't bother with
+ * searching for needles in the rest of this string;
+ * we already know none exist.
+ */
+ idx += strlen(buf->data + idx);
+ }
+ }
+}
+
+
+/**
+ * Read the contents of a file into a newly allocated buffer. It
+ * is possible to 'mmap()', but it leaves less room for checks.
+ */
+void
+scanner (const char *file, const char *find, const char *repl)
+{
+ FILE *fp = NULL;
+ size_t nb = 0;
+
+ struct buffer buf;
+ memset(&buf, 0, sizeof(struct buffer));
+
+ fp = fopen(file, "rb+");
+ if (!fp)
+ {
+ fprintf(stderr,
+ "E: Could not open FILE '%s' for reading!\n",
+ file
+ );
+ return;
+ }
+
+ fseek(fp, 0, SEEK_END);
+ buf.len = ftell(fp);
+ fseek(fp, 0, SEEK_SET);
+
+ /**
+ * Allocate memory for the entire file at once. This is not
+ * ideal, but we don't expect large files for our use case.
+ */
+ buf.data = malloc(buf.len + 1);
+ if (!buf.data)
+ {
+ fclose(fp);
+ fprintf(stderr,
+ "E: Could not allocate '%zu' bytes for file '%s'\n",
+ buf.len,
+ file
+ );
+ return;
+ }
+ buf.data[buf.len] = 0; /* extra byte needs to be nil */
+
+ nb = fread(buf.data, 1, buf.len, fp);
+ if (nb != buf.len)
+ {
+ free(buf.data);
+ buf.data = NULL;
+ fclose(fp);
+ fprintf(stderr,
+ "E: Only read '%zu' / '%zu' bytes of file '%s'\n",
+ nb,
+ buf.len,
+ file
+ );
+ return;
+ }
+ fclose(fp);
+
+ printf("Examining file '%s'...\n", file);
+ replace(&buf, find, repl);
+
+ fp = fopen(file, "wb");
+ if (!fp)
+ {
+ fprintf(stderr,
+ "E: Could not open FILE '%s' for writing!\n",
+ file
+ );
+ return;
+ }
+
+ nb = fwrite(buf.data, 1, buf.len, fp);
+ if (nb != buf.len)
+ {
+ free(buf.data);
+ buf.data = NULL;
+ fclose(fp);
+ fprintf(stderr,
+ "E: Only wrote '%zu' / '%zu' bytes of file '%s'\n",
+ nb,
+ buf.len,
+ file
+ );
+ return;
+ }
+ fclose(fp);
+
+ free(buf.data);
+}
+
+
+/**
+ * WARNING!
+ *
+ * This program replaces all occurrences of NEEDLE within string
+ * sections of an input file with the string REPLACE. The input
+ * file is overwritten. Few, if any, sanity checks are in place.
+ */
+int
+main (int argc, char **argv)
+{
+ char *prog = NULL;
+ char *file = NULL;
+ char *find = NULL;
+ char *repl = NULL;
+
+ prog = argv[0];
+
+ switch (argc)
+ {
+ case 3:
+ file = argv[1];
+ find = argv[2];
+ break;
+ case 4:
+ file = argv[1];
+ find = argv[2];
+ repl = argv[3];
+ if (strlen(repl) > strlen(find))
+ {
+ fprintf(stderr,
+ "E: REPLACE cannot be longer than NEEDLE\n"
+ );
+ return 1;
+ }
+ break;
+ default:
+ fprintf(stderr,
+ "Usage: %s FILE NEEDLE [REPLACE]\n",
+ prog
+ );
+ return 1;
+ }
+
+ scanner(file, find, repl);
+
+ return 0;
+}