summaryrefslogblamecommitdiff
path: root/binsub.c
blob: c66148cba7f72a0991cddbe554a4eedc75a9ae49 (plain) (tree)




















































































































































































































































































                                                                
/**
 * binsub.c / 2022-12-09
 *
 * (C) 2022 Zach van Rijn <me@zv.io>
 *
 * MIT License
 *
 * This utility truncates or replaces needles in an input file;
 * truncation meaning the replacement string is empty.
 *
 * Replacement string length must be less than or equal to that
 * of the needle because the file length must remain unchanged.
 *
 * For efficient operation, consider deploying this on a '.tar'
 * file instead of individual files within a directory.
 */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

/**
 * Basic memory structure.
 */
struct buffer
{
    char *data;
    size_t len;
};


/**
 * Given a needle ('find') and optional replacement ('repl'), if
 * the needle is found, truncate it, inject the replacement, and
 * pad the tail end of the matching string with null bytes.
 *
 * The file length remains the same, and we hope that nobody is
 * relying on precomputed offsets into the strings. Mega kludge!
 *
 * Replacement = "":
 *
 *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 *  in |.|.|.|N|E|E|D|L|E|.|.|.|O|T|H|E|R| |D|A|T|A|.|.|.|0|
 *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 *                       ^           shift data          ^
 *                       +-------------------------------+
 *
 *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 * out |.|.|.|.|.|.|O|T|H|E|R| |D|A|T|A|.|.|.|0|0|0|0|0|0|0|
 *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 *           ^           shift data          ^
 *           +-------------------------------+
 *
 * Replacement = "FOO":
 *
 *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 *  in |.|.|.|N|E|E|D|L|E|.|.|.|O|T|H|E|R| |D|A|T|A|.|.|.|0|
 *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 *                       ^           shift data          ^
 *                       +-------------------------------+
 *
 *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 * out |.|.|.|F|O|O|.|.|.|O|T|H|E|R| |D|A|T|A|.|.|.|0|0|0|0|
 *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 *           ^ FOO ^           shift data          ^
 *           +=====+-------------------------------+
 */
void
replace (struct buffer *buf, const char *find, const char *repl)
{
    char   *match;              /* pointer to found needle    */
    size_t  idx;                /* cursor into file buffer    */

    size_t  nlen;               /* length of needle           */
    size_t  rlen = 0;           /* length of replacement      */

    size_t  mlen;               /* length of matching string  */

    nlen = strlen(find);

    /**
     * Iterate over each character in the current string in the
     * buffer, because multiple full matches may be possible. If
     * we cannot find a match in the current string, skip to the
     * next string. I don't think there is a more optimal way?
     */
    for (idx = 0; idx <= buf->len; idx++)
    {
        /**
         * Does the current string contain the needle?
         */
        match = strstr(buf->data + idx, find);
        if (match)
        {
            /**
             * How long is the current string? We need to search
             * it until we cannot find any more matches.
             */
            mlen = strlen(match);
            printf("%10zu bytes at offset 0x%010lx (%02ld%%)\n",
                mlen,
                (match - buf->data),
                (100 * (match - buf->data)) / buf->len
            );

            /**
             * The replacement string is l.e. the length of the
             * needle, so if it is non-empty, inject it first.
             */
            if (repl)
            {
                rlen = strlen(repl);
                memcpy(match, repl, rlen);
            }

            /**
             * The replacement length may be zero (if empty). In
             * any case, copy the non-needle string remainder to
             * the current matched (needle) location plus offset
             * of any injected replacement. Zero out the tail.
             */
            memmove(match + rlen, match + nlen, mlen - nlen);
            memset(match + mlen - nlen + rlen, 0, nlen - rlen);
        }
        else
        {
            /**
             * This is a partial optimization. Don't bother with
             * searching for needles in the rest of this string;
             * we already know none exist.
             */
            idx += strlen(buf->data + idx);
        }
    }
}


/**
 * Read the contents of a file into a newly allocated buffer. It
 * is possible to 'mmap()', but it leaves less room for checks.
 */
void
scanner (const char *file, const char *find, const char *repl)
{
    FILE   *fp = NULL;
    size_t  nb = 0;

    struct buffer buf;
    memset(&buf, 0, sizeof(struct buffer));

    fp = fopen(file, "rb+");
    if (!fp)
    {
        fprintf(stderr,
            "E: Could not open FILE '%s' for reading!\n",
            file
        );
        return;
    }

    fseek(fp, 0, SEEK_END);
    buf.len = ftell(fp);
    fseek(fp, 0, SEEK_SET);

    /**
     * Allocate memory for the entire file at once. This is not
     * ideal, but we don't expect large files for our use case.
     */
    buf.data = malloc(buf.len + 1);
    if (!buf.data)
    {
        fclose(fp);
        fprintf(stderr,
            "E: Could not allocate '%zu' bytes for file '%s'\n",
            buf.len,
            file
        );
        return;
    }
    buf.data[buf.len] = 0;      /* extra byte needs to be nil */
    
    nb = fread(buf.data, 1, buf.len, fp);
    if (nb != buf.len)
    {
        free(buf.data);
        buf.data = NULL;
        fclose(fp);
        fprintf(stderr,
            "E: Only read '%zu' / '%zu' bytes of file '%s'\n",
            nb,
            buf.len,
            file
        );
        return;
    }
    fclose(fp);

    printf("Examining file '%s'...\n", file);
    replace(&buf, find, repl);

    fp = fopen(file, "wb");
    if (!fp)
    {
        fprintf(stderr,
            "E: Could not open FILE '%s' for writing!\n",
            file
        );
        return;
    }

    nb = fwrite(buf.data, 1, buf.len, fp);
    if (nb != buf.len)
    {
        free(buf.data);
        buf.data = NULL;
        fclose(fp);
        fprintf(stderr,
            "E: Only wrote '%zu' / '%zu' bytes of file '%s'\n",
            nb,
            buf.len,
            file
        );
        return;
    }
    fclose(fp);

    free(buf.data);
}


/**
 * WARNING!
 *
 * This program replaces all occurrences of NEEDLE within string
 * sections of an input file with the string REPLACE. The input
 * file is overwritten. Few, if any, sanity checks are in place.
 */
int
main (int argc, char **argv)
{
    char *prog = NULL;
    char *file = NULL;
    char *find = NULL;
    char *repl = NULL;

    prog = argv[0];

    switch (argc)
    {
    case 3:
        file = argv[1];
        find = argv[2];
        break;
    case 4:
        file = argv[1];
        find = argv[2];
        repl = argv[3];
        if (strlen(repl) > strlen(find))
        {
            fprintf(stderr,
                "E: REPLACE cannot be longer than NEEDLE\n"
            );
            return 1;
        }
        break;
    default:
        fprintf(stderr,
            "Usage: %s FILE NEEDLE [REPLACE]\n",
            prog
        );
        return 1;
    }

    scanner(file, find, repl);

    return 0;
}