/**
* binsub.c / 2022-12-09
*
* (C) 2022 Zach van Rijn <me@zv.io>
*
* MIT License
*
* This utility truncates or replaces needles in an input file;
* truncation meaning the replacement string is empty.
*
* Replacement string length must be less than or equal to that
* of the needle because the file length must remain unchanged.
*
* For efficient operation, consider deploying this on a '.tar'
* file instead of individual files within a directory.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
/**
* Basic memory structure.
*/
struct buffer
{
char *data;
size_t len;
};
/**
* Given a needle ('find') and optional replacement ('repl'), if
* the needle is found, truncate it, inject the replacement, and
* pad the tail end of the matching string with null bytes.
*
* The file length remains the same, and we hope that nobody is
* relying on precomputed offsets into the strings. Mega kludge!
*
* Replacement = "":
*
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* in |.|.|.|N|E|E|D|L|E|.|.|.|O|T|H|E|R| |D|A|T|A|.|.|.|0|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* ^ shift data ^
* +-------------------------------+
*
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* out |.|.|.|.|.|.|O|T|H|E|R| |D|A|T|A|.|.|.|0|0|0|0|0|0|0|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* ^ shift data ^
* +-------------------------------+
*
* Replacement = "FOO":
*
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* in |.|.|.|N|E|E|D|L|E|.|.|.|O|T|H|E|R| |D|A|T|A|.|.|.|0|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* ^ shift data ^
* +-------------------------------+
*
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* out |.|.|.|F|O|O|.|.|.|O|T|H|E|R| |D|A|T|A|.|.|.|0|0|0|0|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* ^ FOO ^ shift data ^
* +=====+-------------------------------+
*/
void
replace (struct buffer *buf, const char *find, const char *repl)
{
char *match; /* pointer to found needle */
size_t idx; /* cursor into file buffer */
size_t nlen; /* length of needle */
size_t rlen = 0; /* length of replacement */
size_t mlen; /* length of matching string */
nlen = strlen(find);
/**
* Iterate over each character in the current string in the
* buffer, because multiple full matches may be possible. If
* we cannot find a match in the current string, skip to the
* next string. I don't think there is a more optimal way?
*/
for (idx = 0; idx <= buf->len; idx++)
{
/**
* Does the current string contain the needle?
*/
match = strstr(buf->data + idx, find);
if (match)
{
/**
* How long is the current string? We need to search
* it until we cannot find any more matches.
*/
mlen = strlen(match);
printf("%10zu bytes at offset 0x%010lx (%02ld%%)\n",
mlen,
(match - buf->data),
(100 * (match - buf->data)) / buf->len
);
/**
* The replacement string is l.e. the length of the
* needle, so if it is non-empty, inject it first.
*/
if (repl)
{
rlen = strlen(repl);
memcpy(match, repl, rlen);
}
/**
* The replacement length may be zero (if empty). In
* any case, copy the non-needle string remainder to
* the current matched (needle) location plus offset
* of any injected replacement. Zero out the tail.
*/
memmove(match + rlen, match + nlen, mlen - nlen);
memset(match + mlen - nlen + rlen, 0, nlen - rlen);
}
else
{
/**
* This is a partial optimization. Don't bother with
* searching for needles in the rest of this string;
* we already know none exist.
*/
idx += strlen(buf->data + idx);
}
}
}
/**
* Read the contents of a file into a newly allocated buffer. It
* is possible to 'mmap()', but it leaves less room for checks.
*/
void
scanner (const char *file, const char *find, const char *repl)
{
FILE *fp = NULL;
size_t nb = 0;
struct buffer buf;
memset(&buf, 0, sizeof(struct buffer));
fp = fopen(file, "rb+");
if (!fp)
{
fprintf(stderr,
"E: Could not open FILE '%s' for reading!\n",
file
);
return;
}
fseek(fp, 0, SEEK_END);
buf.len = ftell(fp);
fseek(fp, 0, SEEK_SET);
/**
* Allocate memory for the entire file at once. This is not
* ideal, but we don't expect large files for our use case.
*/
buf.data = malloc(buf.len + 1);
if (!buf.data)
{
fclose(fp);
fprintf(stderr,
"E: Could not allocate '%zu' bytes for file '%s'\n",
buf.len,
file
);
return;
}
buf.data[buf.len] = 0; /* extra byte needs to be nil */
nb = fread(buf.data, 1, buf.len, fp);
if (nb != buf.len)
{
free(buf.data);
buf.data = NULL;
fclose(fp);
fprintf(stderr,
"E: Only read '%zu' / '%zu' bytes of file '%s'\n",
nb,
buf.len,
file
);
return;
}
fclose(fp);
printf("Examining file '%s'...\n", file);
replace(&buf, find, repl);
fp = fopen(file, "wb");
if (!fp)
{
fprintf(stderr,
"E: Could not open FILE '%s' for writing!\n",
file
);
return;
}
nb = fwrite(buf.data, 1, buf.len, fp);
if (nb != buf.len)
{
free(buf.data);
buf.data = NULL;
fclose(fp);
fprintf(stderr,
"E: Only wrote '%zu' / '%zu' bytes of file '%s'\n",
nb,
buf.len,
file
);
return;
}
fclose(fp);
free(buf.data);
}
/**
* WARNING!
*
* This program replaces all occurrences of NEEDLE within string
* sections of an input file with the string REPLACE. The input
* file is overwritten. Few, if any, sanity checks are in place.
*/
int
main (int argc, char **argv)
{
char *prog = NULL;
char *file = NULL;
char *find = NULL;
char *repl = NULL;
prog = argv[0];
switch (argc)
{
case 3:
file = argv[1];
find = argv[2];
break;
case 4:
file = argv[1];
find = argv[2];
repl = argv[3];
if (strlen(repl) > strlen(find))
{
fprintf(stderr,
"E: REPLACE cannot be longer than NEEDLE\n"
);
return 1;
}
break;
default:
fprintf(stderr,
"Usage: %s FILE NEEDLE [REPLACE]\n",
prog
);
return 1;
}
scanner(file, find, repl);
return 0;
}