State machine for removing ANSI escape sequences

From Thought dump
Revision as of 15:58, 11 January 2025 by Jwo (talk | contribs)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Written quite quickly, but works. The code should be cleaned up a little bit.

bool strip_csi     = true; // Strip terminal control sequences (colors etc.).
bool strip_escapes = true; // Set to false to disable all modifications.

// State machine for processing escape sequences.
void feedthrough_char(std::uint8_t ch)
{
    static enum state {
        ST_BASE,    // Base state.
        ST_ESC,     // Escape received.
        ST_STR,     // In (ignored) ANSI escape string.
        ST_STR_ESC, // Ditto, and received Escape.
        ST_CSEQ,    // In terminal control sequence to skip.
    } state = ST_BASE;

    if (!strip_escapes) {
        goto pass;
    }

    switch (state) {
    case ST_BASE:
        switch (ch) {
        case '\e':
            state = ST_ESC;
            return;
        case 0x9b: // CSI.
            if (!strip_csi) {
                goto pass;
            } else {
                state = ST_CSEQ;
                return;
            }
        case 0x90: // DCS
        case 0x98: // SOS
        case 0x9d: // OSC
        case 0x9e: // PM
        case 0x9f: // APC
            state = ST_STR;
            return;
        default:
            goto pass;
        }
        break;
    case ST_ESC:
        switch (ch) {
        case '[': // CSI.
            if (!strip_csi) {
                fputc('\e', stdout);
                state = ST_BASE;
                goto pass;
            } else {
                state = ST_CSEQ;
                return;
            }
        case 'P': // DCS
        case 'X': // SOS
        case ']': // OSC
        case '^': // PM
        case '_': // APC
            state = ST_STR;
            return;
        default:
            state = ST_BASE;
            fputc('\e', stdout);
            goto pass;
        }
        break;
    case ST_STR:
        switch (ch) {
        case 0x9c: // ST
            state = ST_BASE;
            return;
        case '\e':
            state = ST_STR_ESC;
            return;
        default:
            return;
        }
        break;
    case ST_STR_ESC:
        switch (ch) {
        case '\\': // ST
            state = ST_BASE;
            return;
        default:
            state = ST_STR;
            return;
        }
        break;
    case ST_CSEQ:
        if (ch >= 0x40 && ch <= 0x73) {
            state = ST_BASE;
            return;
        } else {
            return;
        }
        break;
    pass:
        fputc(ch, stdout);
        if (ch == '\n') {
            fflush(stdout);
        }
    }
}