firmware/src/mesh/compression/unishox2.cpp

/*
 * Copyright (C) 2020 Siara Logics (cc)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * @author Arundale Ramanathan
 *
 * Port for Particle (particle.io) / Aruino - Jonathan Greenblatt
 */
/**
 * @file unishox2.c
 * @author Arundale Ramanathan, James Z. M. Gao
 * @brief Main code of Unishox2 Compression and Decompression library
 *
 * This file implements the code for the Unishox API function \n
 * defined in unishox2.h
 */

#include <ctype.h>
#include <limits.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

#include "unishox2.h"

/// uint8_t is unsigned char
typedef unsigned char uint8_t;

const char *USX_FREQ_SEQ_DFLT[] = {"\": \"", "\": ", "</", "=\"", "\":\"", "://"};
const char *USX_FREQ_SEQ_TXT[] = {" the ", " and ", "tion", " with", "ing", "ment"};
const char *USX_FREQ_SEQ_URL[] = {"https://", "www.", ".com", "http://", ".org", ".net"};
const char *USX_FREQ_SEQ_JSON[] = {"\": \"", "\": ", "\",", "}}}", "\":\"", "}}"};
const char *USX_FREQ_SEQ_HTML[] = {"</", "=\"", "div", "href", "class", "<p>"};
const char *USX_FREQ_SEQ_XML[] = {"</", "=\"", "\">", "<?xml version=\"1.0\"", "xmlns:", "://"};
const char *USX_TEMPLATES[] = {"tfff-of-tfTtf:rf:rf.fffZ", "tfff-of-tf", "(fff) fff-ffff", "tf:rf:rf", 0};

/// possible horizontal sets and states
enum { USX_ALPHA = 0, USX_SYM, USX_NUM, USX_DICT, USX_DELTA, USX_NUM_TEMP };

/// This 2D array has the characters for the sets USX_ALPHA, USX_SYM and USX_NUM. Where a character cannot fit into a uint8_t, 0
/// is used and handled in code.
uint8_t usx_sets[][28] = {{0,   ' ', 'e', 't', 'a', 'o', 'i', 'n', 's', 'r', 'l', 'c', 'd', 'h',
                           'u', 'p', 'm', 'b', 'g', 'w', 'f', 'y', 'v', 'k', 'q', 'j', 'x', 'z'},
                          {'"',  '{', '}', '_', '<', '>', ':', '\n', 0,    '[', ']', '\\', ';', '\'',
                           '\t', '@', '*', '&', '?', '!', '^', '|',  '\r', '~', '`', 0,    0,   0},
                          {0,   ',', '.', '0', '1', '9', '2', '5', '-', '/', '3', '4', '6', '7',
                           '8', '(', ')', ' ', '=', '+', '$', '%', '#', 0,   0,   0,   0,   0}};

/// Stores position of letter in usx_sets.
/// First 3 bits - position in usx_hcodes
/// Next  5 bits - position in usx_vcodes
uint8_t usx_code_94[94];

/// Vertical codes starting from the MSB
uint8_t usx_vcodes[] = {0x00, 0x40, 0x60, 0x80, 0x90, 0xA0, 0xB0, 0xC0, 0xD0, 0xD8, 0xE0, 0xE4, 0xE8, 0xEC,
                        0xEE, 0xF0, 0xF2, 0xF4, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF};

/// Length of each veritical code
uint8_t usx_vcode_lens[] = {2, 3, 3, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};

/// Vertical Codes and Set number for frequent sequences in sets USX_SYM and USX_NUM. First 3 bits indicate set (USX_SYM/USX_NUM)
/// and rest are vcode positions
uint8_t usx_freq_codes[] = {(1 << 5) + 25, (1 << 5) + 26, (1 << 5) + 27, (2 << 5) + 23, (2 << 5) + 24, (2 << 5) + 25};

/// Not used
const int UTF8_MASK[] = {0xE0, 0xF0, 0xF8};
/// Not used
const int UTF8_PREFIX[] = {0xC0, 0xE0, 0xF0};

/// Minimum length to consider as repeating sequence
#define NICE_LEN 5

/// Set (USX_NUM - 2) and vertical code (26) for encoding repeating letters
#define RPT_CODE ((2 << 5) + 26)
/// Set (USX_NUM - 2) and vertical code (27) for encoding terminator
#define TERM_CODE ((2 << 5) + 27)
/// Set (USX_SYM - 1) and vertical code (7) for encoding Line feed \\n
#define LF_CODE ((1 << 5) + 7)
/// Set (USX_NUM - 1) and vertical code (8) for encoding \\r\\n
#define CRLF_CODE ((1 << 5) + 8)
/// Set (USX_NUM - 1) and vertical code (22) for encoding \\r
#define CR_CODE ((1 << 5) + 22)
/// Set (USX_NUM - 1) and vertical code (14) for encoding \\t
#define TAB_CODE ((1 << 5) + 14)
/// Set (USX_NUM - 2) and vertical code (17) for space character when it appears in USX_NUM state \\r
#define NUM_SPC_CODE ((2 << 5) + 17)

/// Code for special code (11111) when state=USX_DELTA
#define UNI_STATE_SPL_CODE 0xF8
/// Length of Code for special code when state=USX_DELTA
#define UNI_STATE_SPL_CODE_LEN 5
/// Code for switch code when state=USX_DELTA
#define UNI_STATE_SW_CODE 0x80
/// Length of Code for Switch code when state=USX_DELTA
#define UNI_STATE_SW_CODE_LEN 2

/// Switch code in USX_ALPHA and USX_NUM 00
#define SW_CODE 0
/// Length of Switch code
#define SW_CODE_LEN 2
/// Terminator bit sequence for Preset 1. Length varies depending on state as per following macros
#define TERM_BYTE_PRESET_1 0
/// Length of Terminator bit sequence when state is lower
#define TERM_BYTE_PRESET_1_LEN_LOWER 6
/// Length of Terminator bit sequence when state is upper
#define TERM_BYTE_PRESET_1_LEN_UPPER 4

/// Offset at which usx_code_94 starts
#define USX_OFFSET_94 33

/// global to indicate whether initialization is complete or not
uint8_t is_inited = 0;

/// Fills the usx_code_94 94 letter array based on sets of characters at usx_sets \n
/// For each element in usx_code_94, first 3 msb bits is set (USX_ALPHA / USX_SYM / USX_NUM) \n
/// and the rest 5 bits indicate the vertical position in the corresponding set
void init_coder()
{
    if (is_inited)
        return;
    memset(usx_code_94, '\0', sizeof(usx_code_94));
    for (int i = 0; i < 3; i++) {
        for (int j = 0; j < 28; j++) {
            uint8_t c = usx_sets[i][j];
            if (c > 32) {
                usx_code_94[c - USX_OFFSET_94] = (i << 5) + j;
                if (c >= 'a' && c <= 'z')
                    usx_code_94[c - USX_OFFSET_94 - ('a' - 'A')] = (i << 5) + j;
            }
        }
    }
    is_inited = 1;
}

/// Mask for retrieving each code to be encoded according to its length
unsigned int usx_mask[] = {0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF};

/// Appends specified number of bits to the output (out) \n
/// If maximum limit (olen) is reached, -1 is returned \n
/// Otherwise clen bits in code are appended to out starting with MSB
int append_bits(char *out, int olen, int ol, uint8_t code, int clen)
{

    // printf("%d,%x,%d,%d\n", ol, code, clen, state);

    while (clen > 0) {
        int oidx;
        unsigned char a_byte;

        uint8_t cur_bit = ol % 8;
        uint8_t blen = clen;
        a_byte = code & usx_mask[blen - 1];
        a_byte >>= cur_bit;
        if (blen + cur_bit > 8)
            blen = (8 - cur_bit);
        oidx = ol / 8;
        if (oidx < 0 || olen <= oidx)
            return -1;
        if (cur_bit == 0)
            out[oidx] = a_byte;
        else
            out[oidx] |= a_byte;
        code <<= blen;
        ol += blen;
        clen -= blen;
    }
    return ol;
}

/// This is a safe call to append_bits() making sure it does not write past olen
#define SAFE_APPEND_BITS(exp)                                                                                                    \
    do {                                                                                                                         \
        const int newidx = (exp);                                                                                                \
        if (newidx < 0)                                                                                                          \
            return newidx;                                                                                                       \
    } while (0)

/// Appends switch code to out depending on the state (USX_DELTA or other)
int append_switch_code(char *out, int olen, int ol, uint8_t state)
{
    if (state == USX_DELTA) {
        SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, UNI_STATE_SPL_CODE, UNI_STATE_SPL_CODE_LEN));
        SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, UNI_STATE_SW_CODE, UNI_STATE_SW_CODE_LEN));
    } else
        SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, SW_CODE, SW_CODE_LEN));
    return ol;
}

/// Appends given horizontal and veritical code bits to out
int append_code(char *out, int olen, int ol, uint8_t code, uint8_t *state, const uint8_t usx_hcodes[],
                const uint8_t usx_hcode_lens[])
{
    uint8_t hcode = code >> 5;
    uint8_t vcode = code & 0x1F;
    if (!usx_hcode_lens[hcode] && hcode != USX_ALPHA)
        return ol;
    switch (hcode) {
    case USX_ALPHA:
        if (*state != USX_ALPHA) {
            SAFE_APPEND_BITS(ol = append_switch_code(out, olen, ol, *state));
            SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA]));
            *state = USX_ALPHA;
        }
        break;
    case USX_SYM:
        SAFE_APPEND_BITS(ol = append_switch_code(out, olen, ol, *state));
        SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, usx_hcodes[USX_SYM], usx_hcode_lens[USX_SYM]));
        break;
    case USX_NUM:
        if (*state != USX_NUM) {
            SAFE_APPEND_BITS(ol = append_switch_code(out, olen, ol, *state));
            SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, usx_hcodes[USX_NUM], usx_hcode_lens[USX_NUM]));
            if (usx_sets[hcode][vcode] >= '0' && usx_sets[hcode][vcode] <= '9')
                *state = USX_NUM;
        }
    }
    SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, usx_vcodes[vcode], usx_vcode_lens[vcode]));
    return ol;
}

/// Length of bits used to represent count for each level
const uint8_t count_bit_lens[5] = {2, 4, 7, 11, 16};
/// Cumulative counts represented at each level
const int32_t count_adder[5] = {4, 20, 148, 2196, 67732};
/// Codes used to specify the level that the count belongs to
const uint8_t count_codes[] = {0x01, 0x82, 0xC3, 0xE4, 0xF4};
/// Encodes given count to out
int encodeCount(char *out, int olen, int ol, int count)
{
    // First five bits are code and Last three bits of codes represent length
    for (int i = 0; i < 5; i++) {
        if (count < count_adder[i]) {
            SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, (count_codes[i] & 0xF8), count_codes[i] & 0x07));
            uint16_t count16 = (count - (i ? count_adder[i - 1] : 0)) << (16 - count_bit_lens[i]);
            if (count_bit_lens[i] > 8) {
                SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, count16 >> 8, 8));
                SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, count16 & 0xFF, count_bit_lens[i] - 8));
            } else
                SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, count16 >> 8, count_bit_lens[i]));
            return ol;
        }
    }
    return ol;
}

/// Length of bits used to represent delta code for each level
const uint8_t uni_bit_len[5] = {6, 12, 14, 16, 21};
/// Cumulative delta codes represented at each level
const int32_t uni_adder[5] = {0, 64, 4160, 20544, 86080};

/// Encodes the unicode code point given by code to out. prev_code is used to calculate the delta
int encodeUnicode(char *out, int olen, int ol, int32_t code, int32_t prev_code)
{
    // First five bits are code and Last three bits of codes represent length
    // const uint8_t codes[8] = {0x00, 0x42, 0x83, 0xA3, 0xC3, 0xE4, 0xF5, 0xFD};
    const uint8_t codes[6] = {0x01, 0x82, 0xC3, 0xE4, 0xF5, 0xFD};
    int32_t till = 0;
    int32_t diff = code - prev_code;
    if (diff < 0)
        diff = -diff;
    // printf("%ld, ", code);
    // printf("Diff: %d\n", diff);
    for (int i = 0; i < 5; i++) {
        till += (1 << uni_bit_len[i]);
        if (diff < till) {
            SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, (codes[i] & 0xF8), codes[i] & 0x07));
            // if (diff) {
            SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, prev_code > code ? 0x80 : 0, 1));
            int32_t val = diff - uni_adder[i];
            // printf("Val: %d\n", val);
            if (uni_bit_len[i] > 16) {
                val <<= (24 - uni_bit_len[i]);
                SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, val >> 16, 8));
                SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, (val >> 8) & 0xFF, 8));
                SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, val & 0xFF, uni_bit_len[i] - 16));
            } else if (uni_bit_len[i] > 8) {
                val <<= (16 - uni_bit_len[i]);
                SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, val >> 8, 8));
                SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, val & 0xFF, uni_bit_len[i] - 8));
            } else {
                val <<= (8 - uni_bit_len[i]);
                SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, val & 0xFF, uni_bit_len[i]));
            }
            return ol;
        }
    }
    return ol;
}

/// Reads UTF-8 character from in. Also returns the number of bytes occupied by the UTF-8 character in utf8len
int32_t readUTF8(const char *in, int len, int l, int *utf8len)
{
    int32_t ret = 0;
    if (l < (len - 1) && (in[l] & 0xE0) == 0xC0 && (in[l + 1] & 0xC0) == 0x80) {
        *utf8len = 2;
        ret = (in[l] & 0x1F);
        ret <<= 6;
        ret += (in[l + 1] & 0x3F);
        if (ret < 0x80)
            ret = 0;
    } else if (l < (len - 2) && (in[l] & 0xF0) == 0xE0 && (in[l + 1] & 0xC0) == 0x80 && (in[l + 2] & 0xC0) == 0x80) {
        *utf8len = 3;
        ret = (in[l] & 0x0F);
        ret <<= 6;
        ret += (in[l + 1] & 0x3F);
        ret <<= 6;
        ret += (in[l + 2] & 0x3F);
        if (ret < 0x0800)
            ret = 0;
    } else if (l < (len - 3) && (in[l] & 0xF8) == 0xF0 && (in[l + 1] & 0xC0) == 0x80 && (in[l + 2] & 0xC0) == 0x80 &&
               (in[l + 3] & 0xC0) == 0x80) {
        *utf8len = 4;
        ret = (in[l] & 0x07);
        ret <<= 6;
        ret += (in[l + 1] & 0x3F);
        ret <<= 6;
        ret += (in[l + 2] & 0x3F);
        ret <<= 6;
        ret += (in[l + 3] & 0x3F);
        if (ret < 0x10000)
            ret = 0;
    }
    return ret;
}

/// Finds the longest matching sequence from the beginning of the string. \n
/// If a match is found and it is longer than NICE_LEN, it is encoded as a repeating sequence to out \n
/// This is also used for Unicode strings \n
/// This is a crude implementation that is not optimized.  Assuming only short strings \n
/// are encoded, this is not much of an issue.
int matchOccurance(const char *in, int len, int l, char *out, int olen, int *ol, const uint8_t *state, const uint8_t usx_hcodes[],
                   const uint8_t usx_hcode_lens[])
{
    int j, k;
    int longest_dist = 0;
    int longest_len = 0;
    for (j = l - NICE_LEN; j >= 0; j--) {
        for (k = l; k < len && j + k - l < l; k++) {
            if (in[k] != in[j + k - l])
                break;
        }
        while ((((unsigned char)in[k]) >> 6) == 2)
            k--; // Skip partial UTF-8 matches
        // if ((in[k - 1] >> 3) == 0x1E || (in[k - 1] >> 4) == 0x0E || (in[k - 1] >> 5) == 0x06)
        //   k--;
        if ((k - l) > (NICE_LEN - 1)) {
            int match_len = k - l - NICE_LEN;
            int match_dist = l - j - NICE_LEN + 1;
            if (match_len > longest_len) {
                longest_len = match_len;
                longest_dist = match_dist;
            }
        }
    }
    if (longest_len) {
        SAFE_APPEND_BITS(*ol = append_switch_code(out, olen, *ol, *state));
        SAFE_APPEND_BITS(*ol = append_bits(out, olen, *ol, usx_hcodes[USX_DICT], usx_hcode_lens[USX_DICT]));
        // printf("Len:%d / Dist:%d/%.*s\n", longest_len, longest_dist, longest_len + NICE_LEN, in + l - longest_dist - NICE_LEN +
        // 1);
        SAFE_APPEND_BITS(*ol = encodeCount(out, olen, *ol, longest_len));
        SAFE_APPEND_BITS(*ol = encodeCount(out, olen, *ol, longest_dist));
        l += (longest_len + NICE_LEN);
        l--;
        return l;
    }
    return -l;
}

/// This is used only when encoding a string array
/// Finds the longest matching sequence from the previous array element to the beginning of the string array. \n
/// If a match is found and it is longer than NICE_LEN, it is encoded as a repeating sequence to out \n
/// This is also used for Unicode strings \n
/// This is a crude implementation that is not optimized.  Assuming only short strings \n
/// are encoded, this is not much of an issue.
int matchLine(const char *in, int len, int l, char *out, int olen, int *ol, struct us_lnk_lst *prev_lines, const uint8_t *state,
              const uint8_t usx_hcodes[], const uint8_t usx_hcode_lens[])
{
    int last_ol = *ol;
    int last_len = 0;
    int last_dist = 0;
    int last_ctx = 0;
    int line_ctr = 0;
    int j = 0;
    do {
        int i, k;
        int line_len = (int)strlen(prev_lines->data);
        int limit = (line_ctr == 0 ? l : line_len);
        for (; j < limit; j++) {
            for (i = l, k = j; k < line_len && i < len; k++, i++) {
                if (prev_lines->data[k] != in[i])
                    break;
            }
            while ((((unsigned char)prev_lines->data[k]) >> 6) == 2)
                k--; // Skip partial UTF-8 matches
            if ((k - j) >= NICE_LEN) {
                if (last_len) {
                    if (j > last_dist)
                        continue;
                    // int saving = ((k - j) - last_len) + (last_dist - j) + (last_ctx - line_ctr);
                    // if (saving < 0) {
                    //   //printf("No savng: %d\n", saving);
                    //   continue;
                    // }
                    *ol = last_ol;
                }
                last_len = (k - j);
                last_dist = j;
                last_ctx = line_ctr;
                SAFE_APPEND_BITS(*ol = append_switch_code(out, olen, *ol, *state));
                SAFE_APPEND_BITS(*ol = append_bits(out, olen, *ol, usx_hcodes[USX_DICT], usx_hcode_lens[USX_DICT]));
                SAFE_APPEND_BITS(*ol = encodeCount(out, olen, *ol, last_len - NICE_LEN));
                SAFE_APPEND_BITS(*ol = encodeCount(out, olen, *ol, last_dist));
                SAFE_APPEND_BITS(*ol = encodeCount(out, olen, *ol, last_ctx));
                /*
                if ((*ol - last_ol) > (last_len * 4)) {
                  last_len = 0;
                  *ol = last_ol;
                }*/
                // printf("Len: %d, Dist: %d, Line: %d\n", last_len, last_dist, last_ctx);
                j += last_len;
            }
        }
        line_ctr++;
        prev_lines = prev_lines->previous;
    } while (prev_lines && prev_lines->data != NULL);
    if (last_len) {
        l += last_len;
        l--;
        return l;
    }
    return -l;
}

/// Returns 4 bit code assuming ch falls between '0' to '9', \n
/// 'A' to 'F' or 'a' to 'f'
uint8_t getBaseCode(char ch)
{
    if (ch >= '0' && ch <= '9')
        return (ch - '0') << 4;
    else if (ch >= 'A' && ch <= 'F')
        return (ch - 'A' + 10) << 4;
    else if (ch >= 'a' && ch <= 'f')
        return (ch - 'a' + 10) << 4;
    return 0;
}

/// Enum indicating nibble type - USX_NIB_NUM means ch is a number '0' to '9', \n
/// USX_NIB_HEX_LOWER means ch is between 'a' to 'f', \n
/// USX_NIB_HEX_UPPER means ch is between 'A' to 'F'
enum { USX_NIB_NUM = 0, USX_NIB_HEX_LOWER, USX_NIB_HEX_UPPER, USX_NIB_NOT };
/// Gets 4 bit code assuming ch falls between '0' to '9', \n
/// 'A' to 'F' or 'a' to 'f'
char getNibbleType(char ch)
{
    if (ch >= '0' && ch <= '9')
        return USX_NIB_NUM;
    else if (ch >= 'a' && ch <= 'f')
        return USX_NIB_HEX_LOWER;
    else if (ch >= 'A' && ch <= 'F')
        return USX_NIB_HEX_UPPER;
    return USX_NIB_NOT;
}

/// Starts coding of nibble sets
int append_nibble_escape(char *out, int olen, int ol, uint8_t state, const uint8_t usx_hcodes[], const uint8_t usx_hcode_lens[])
{
    SAFE_APPEND_BITS(ol = append_switch_code(out, olen, ol, state));
    SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, usx_hcodes[USX_NUM], usx_hcode_lens[USX_NUM]));
    SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, 0, 2));
    return ol;
}

/// Returns minimum value of two longs
long min_of(long c, long i)
{
    return c > i ? i : c;
}

/// Appends the terminator code depending on the state, preset and whether full terminator needs to be encoded to out or not \n
int append_final_bits(char *const out, const int olen, int ol, const uint8_t state, const uint8_t is_all_upper,
                      const uint8_t usx_hcodes[], const uint8_t usx_hcode_lens[])
{
    if (usx_hcode_lens[USX_ALPHA]) {
        if (USX_NUM != state) {
            // for num state, append TERM_CODE directly
            // for other state, switch to Num Set first
            SAFE_APPEND_BITS(ol = append_switch_code(out, olen, ol, state));
            SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, usx_hcodes[USX_NUM], usx_hcode_lens[USX_NUM]));
        }
        SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, usx_vcodes[TERM_CODE & 0x1F], usx_vcode_lens[TERM_CODE & 0x1F]));
    } else {
        // preset 1, terminate at 2 or 3 SW_CODE, i.e., 4 or 6 continuous 0 bits
        // see discussion: https://github.com/siara-cc/Unishox/issues/19#issuecomment-922435580
        SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, TERM_BYTE_PRESET_1,
                                          is_all_upper ? TERM_BYTE_PRESET_1_LEN_UPPER : TERM_BYTE_PRESET_1_LEN_LOWER));
    }

    // fill uint8_t with the last bit
    SAFE_APPEND_BITS(
        ol = append_bits(out, olen, ol, (ol == 0 || out[(ol - 1) / 8] << ((ol - 1) & 7) >= 0) ? 0 : 0xFF, (8 - ol % 8) & 7));

    return ol;
}

/// Macro used in the main compress function so that if the output len exceeds given maximum length (olen) it can exit
#define SAFE_APPEND_BITS2(olen, exp)                                                                                             \
    do {                                                                                                                         \
        const int newidx = (exp);                                                                                                \
        const int __olen = (olen);                                                                                               \
        if (newidx < 0)                                                                                                          \
            return __olen >= 0 ? __olen + 1 : (1 - __olen) * 4;                                                                  \
    } while (0)

// Main API function. See unishox2.h for documentation
int unishox2_compress_lines(const char *in, int len, UNISHOX_API_OUT_AND_LEN(char *out, int olen), const uint8_t usx_hcodes[],
                            const uint8_t usx_hcode_lens[], const char *usx_freq_seq[], const char *usx_templates[],
                            struct us_lnk_lst *prev_lines)
{

    uint8_t state;

    int l, ll, ol;
    char c_in, c_next;
    int prev_uni;
    uint8_t is_upper, is_all_upper;
#if (UNISHOX_API_OUT_AND_LEN(0, 1)) == 0
    const int olen = INT_MAX - 1;
    const int rawolen = olen;
    const uint8_t need_full_term_codes = 0;
#else
    const int rawolen = olen;
    uint8_t need_full_term_codes = 0;
    if (olen < 0) {
        need_full_term_codes = 1;
        olen *= -1;
    }
#endif

    init_coder();
    ol = 0;
    prev_uni = 0;
    state = USX_ALPHA;
    is_all_upper = 0;
    SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, UNISHOX_MAGIC_BITS, UNISHOX_MAGIC_BIT_LEN)); // magic bit(s)
    for (l = 0; l < len; l++) {

        if (usx_hcode_lens[USX_DICT] && l < (len - NICE_LEN + 1)) {
            if (prev_lines) {
                l = matchLine(in, len, l, out, olen, &ol, prev_lines, &state, usx_hcodes, usx_hcode_lens);
                if (l > 0) {
                    continue;
                } else if (l < 0 && ol < 0) {
                    return olen + 1;
                }
                l = -l;
            } else {
                l = matchOccurance(in, len, l, out, olen, &ol, &state, usx_hcodes, usx_hcode_lens);
                if (l > 0) {
                    continue;
                } else if (l < 0 && ol < 0) {
                    return olen + 1;
                }
                l = -l;
            }
        }

        c_in = in[l];
        if (l && len > 4 && l < (len - 4) && usx_hcode_lens[USX_NUM]) {
            if (c_in == in[l - 1] && c_in == in[l + 1] && c_in == in[l + 2] && c_in == in[l + 3]) {
                int rpt_count = l + 4;
                while (rpt_count < len && in[rpt_count] == c_in)
                    rpt_count++;
                rpt_count -= l;
                SAFE_APPEND_BITS2(rawolen, ol = append_code(out, olen, ol, RPT_CODE, &state, usx_hcodes, usx_hcode_lens));
                SAFE_APPEND_BITS2(rawolen, ol = encodeCount(out, olen, ol, rpt_count - 4));
                l += rpt_count;
                l--;
                continue;
            }
        }

        if (l <= (len - 36) && usx_hcode_lens[USX_NUM]) {
            if (in[l + 8] == '-' && in[l + 13] == '-' && in[l + 18] == '-' && in[l + 23] == '-') {
                char hex_type = USX_NIB_NUM;
                int uid_pos = l;
                for (; uid_pos < l + 36; uid_pos++) {
                    char c_uid = in[uid_pos];
                    if (c_uid == '-' && (uid_pos == 8 || uid_pos == 13 || uid_pos == 18 || uid_pos == 23))
                        continue;
                    char nib_type = getNibbleType(c_uid);
                    if (nib_type == USX_NIB_NOT)
                        break;
                    if (nib_type != USX_NIB_NUM) {
                        if (hex_type != USX_NIB_NUM && hex_type != nib_type)
                            break;
                        hex_type = nib_type;
                    }
                }
                if (uid_pos == l + 36) {
                    SAFE_APPEND_BITS2(rawolen, ol = append_nibble_escape(out, olen, ol, state, usx_hcodes, usx_hcode_lens));
                    SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, (hex_type == USX_NIB_HEX_LOWER ? 0xC0 : 0xF0),
                                                                (hex_type == USX_NIB_HEX_LOWER ? 3 : 5)));
                    for (uid_pos = l; uid_pos < l + 36; uid_pos++) {
                        char c_uid = in[uid_pos];
                        if (c_uid != '-')
                            SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, getBaseCode(c_uid), 4));
                    }
                    // printf("GUID:\n");
                    l += 35;
                    continue;
                }
            }
        }

        if (l < (len - 5) && usx_hcode_lens[USX_NUM]) {
            char hex_type = USX_NIB_NUM;
            int hex_len = 0;
            do {
                char nib_type = getNibbleType(in[l + hex_len]);
                if (nib_type == USX_NIB_NOT)
                    break;
                if (nib_type != USX_NIB_NUM) {
                    if (hex_type != USX_NIB_NUM && hex_type != nib_type)
                        break;
                    hex_type = nib_type;
                }
                hex_len++;
            } while (l + hex_len < len);
            if (hex_len > 10 && hex_type == USX_NIB_NUM)
                hex_type = USX_NIB_HEX_LOWER;
            if ((hex_type == USX_NIB_HEX_LOWER || hex_type == USX_NIB_HEX_UPPER) && hex_len > 3) {
                SAFE_APPEND_BITS2(rawolen, ol = append_nibble_escape(out, olen, ol, state, usx_hcodes, usx_hcode_lens));
                SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, (hex_type == USX_NIB_HEX_LOWER ? 0x80 : 0xE0),
                                                            (hex_type == USX_NIB_HEX_LOWER ? 2 : 4)));
                SAFE_APPEND_BITS2(rawolen, ol = encodeCount(out, olen, ol, hex_len));
                do {
                    SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, getBaseCode(in[l++]), 4));
                } while (--hex_len);
                l--;
                continue;
            }
        }

        if (usx_templates != NULL) {
            int i;
            for (i = 0; i < 5; i++) {
                if (usx_templates[i]) {
                    int rem = (int)strlen(usx_templates[i]);
                    int j = 0;
                    for (; j < rem && l + j < len; j++) {
                        char c_t = usx_templates[i][j];
                        c_in = in[l + j];
                        if (c_t == 'f' || c_t == 'F') {
                            if (getNibbleType(c_in) != (c_t == 'f' ? USX_NIB_HEX_LOWER : USX_NIB_HEX_UPPER) &&
                                getNibbleType(c_in) != USX_NIB_NUM) {
                                break;
                            }
                        } else if (c_t == 'r' || c_t == 't' || c_t == 'o') {
                            if (c_in < '0' || c_in > (c_t == 'r' ? '7' : (c_t == 't' ? '3' : '1')))
                                break;
                        } else if (c_t != c_in)
                            break;
                    }
                    if (((float)j / rem) > 0.66) {
                        // printf("%s\n", usx_templates[i]);
                        rem = rem - j;
                        SAFE_APPEND_BITS2(rawolen, ol = append_nibble_escape(out, olen, ol, state, usx_hcodes, usx_hcode_lens));
                        SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, 0, 1));
                        SAFE_APPEND_BITS2(rawolen,
                                          ol = append_bits(out, olen, ol, (count_codes[i] & 0xF8), count_codes[i] & 0x07));
                        SAFE_APPEND_BITS2(rawolen, ol = encodeCount(out, olen, ol, rem));
                        for (int k = 0; k < j; k++) {
                            char c_t = usx_templates[i][k];
                            if (c_t == 'f' || c_t == 'F')
                                SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, getBaseCode(in[l + k]), 4));
                            else if (c_t == 'r' || c_t == 't' || c_t == 'o') {
                                c_t = (c_t == 'r' ? 3 : (c_t == 't' ? 2 : 1));
                                SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, (in[l + k] - '0') << (8 - c_t), c_t));
                            }
                        }
                        l += j;
                        l--;
                        break;
                    }
                }
            }
            if (i < 5)
                continue;
        }

        if (usx_freq_seq != NULL) {
            int i;
            for (i = 0; i < 6; i++) {
                int seq_len = (int)strlen(usx_freq_seq[i]);
                if (len - seq_len >= 0 && l <= len - seq_len) {
                    if (memcmp(usx_freq_seq[i], in + l, seq_len) == 0 && usx_hcode_lens[usx_freq_codes[i] >> 5]) {
                        SAFE_APPEND_BITS2(rawolen,
                                          ol = append_code(out, olen, ol, usx_freq_codes[i], &state, usx_hcodes, usx_hcode_lens));
                        l += seq_len;
                        l--;
                        break;
                    }
                }
            }
            if (i < 6)
                continue;
        }

        c_in = in[l];

        is_upper = 0;
        if (c_in >= 'A' && c_in <= 'Z')
            is_upper = 1;
        else {
            if (is_all_upper) {
                is_all_upper = 0;
                SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state));
                SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA]));
                state = USX_ALPHA;
            }
        }
        if (is_upper && !is_all_upper) {
            if (state == USX_NUM) {
                SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state));
                SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA]));
                state = USX_ALPHA;
            }
            SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state));
            SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA]));
            if (state == USX_DELTA) {
                state = USX_ALPHA;
                SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state));
                SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA]));
            }
        }
        c_next = 0;
        if (l + 1 < len)
            c_next = in[l + 1];

        if (c_in >= 32 && c_in <= 126) {
            if (is_upper && !is_all_upper) {
                for (ll = l + 4; ll >= l && ll < len; ll--) {
                    if (in[ll] < 'A' || in[ll] > 'Z')
                        break;
                }
                if (ll == l - 1) {
                    SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state));
                    SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA]));
                    state = USX_ALPHA;
                    is_all_upper = 1;
                }
            }
            if (state == USX_DELTA && (c_in == ' ' || c_in == '.' || c_in == ',')) {
                uint8_t spl_code = (c_in == ',' ? 0xC0 : (c_in == '.' ? 0xE0 : (c_in == ' ' ? 0 : 0xFF)));
                if (spl_code != 0xFF) {
                    uint8_t spl_code_len = (c_in == ',' ? 3 : (c_in == '.' ? 4 : (c_in == ' ' ? 1 : 4)));
                    SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, UNI_STATE_SPL_CODE, UNI_STATE_SPL_CODE_LEN));
                    SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, spl_code, spl_code_len));
                    continue;
                }
            }
            c_in -= 32;
            if (is_all_upper && is_upper)
                c_in += 32;
            if (c_in == 0) {
                if (state == USX_NUM)
                    SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_vcodes[NUM_SPC_CODE & 0x1F],
                                                                usx_vcode_lens[NUM_SPC_CODE & 0x1F]));
                else
                    SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_vcodes[1], usx_vcode_lens[1]));
            } else {
                c_in--;
                SAFE_APPEND_BITS2(rawolen,
                                  ol = append_code(out, olen, ol, usx_code_94[(int)c_in], &state, usx_hcodes, usx_hcode_lens));
            }
        } else if (c_in == 13 && c_next == 10) {
            SAFE_APPEND_BITS2(rawolen, ol = append_code(out, olen, ol, CRLF_CODE, &state, usx_hcodes, usx_hcode_lens));
            l++;
        } else if (c_in == 10) {
            if (state == USX_DELTA) {
                SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, UNI_STATE_SPL_CODE, UNI_STATE_SPL_CODE_LEN));
                SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, 0xF0, 4));
            } else
                SAFE_APPEND_BITS2(rawolen, ol = append_code(out, olen, ol, LF_CODE, &state, usx_hcodes, usx_hcode_lens));
        } else if (c_in == 13) {
            SAFE_APPEND_BITS2(rawolen, ol = append_code(out, olen, ol, CR_CODE, &state, usx_hcodes, usx_hcode_lens));
        } else if (c_in == '\t') {
            SAFE_APPEND_BITS2(rawolen, ol = append_code(out, olen, ol, TAB_CODE, &state, usx_hcodes, usx_hcode_lens));
        } else {
            int utf8len;
            int32_t uni = readUTF8(in, len, l, &utf8len);
            if (uni) {
                l += utf8len;
                if (state != USX_DELTA) {
                    int32_t uni2 = readUTF8(in, len, l, &utf8len);
                    if (uni2) {
                        if (state != USX_ALPHA) {
                            SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state));
                            SAFE_APPEND_BITS2(rawolen,
                                              ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA]));
                        }
                        SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state));
                        SAFE_APPEND_BITS2(rawolen,
                                          ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA]));
                        SAFE_APPEND_BITS2(
                            rawolen, ol = append_bits(out, olen, ol, usx_vcodes[1], usx_vcode_lens[1])); // code for space (' ')
                        state = USX_DELTA;
                    } else {
                        SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state));
                        SAFE_APPEND_BITS2(rawolen,
                                          ol = append_bits(out, olen, ol, usx_hcodes[USX_DELTA], usx_hcode_lens[USX_DELTA]));
                    }
                }
                SAFE_APPEND_BITS2(rawolen, ol = encodeUnicode(out, olen, ol, uni, prev_uni));
                // printf("%d:%d:%d\n", l, utf8len, uni);
                prev_uni = uni;
                l--;
            } else {
                int bin_count = 1;
                for (int bi = l + 1; bi < len; bi++) {
                    char c_bi = in[bi];
                    // if (c_bi > 0x1F && c_bi != 0x7F)
                    //   break;
                    if (readUTF8(in, len, bi, &utf8len))
                        break;
                    if (bi < (len - 4) && c_bi == in[bi - 1] && c_bi == in[bi + 1] && c_bi == in[bi + 2] && c_bi == in[bi + 3])
                        break;
                    bin_count++;
                }
                // printf("Bin:%d:%d:%x:%d\n", l, (unsigned char) c_in, (unsigned char) c_in, bin_count);
                SAFE_APPEND_BITS2(rawolen, ol = append_nibble_escape(out, olen, ol, state, usx_hcodes, usx_hcode_lens));
                SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, 0xF8, 5));
                SAFE_APPEND_BITS2(rawolen, ol = encodeCount(out, olen, ol, bin_count));
                do {
                    SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, in[l++], 8));
                } while (--bin_count);
                l--;
            }
        }
    }

    if (need_full_term_codes) {
        const int orig_ol = ol;
        SAFE_APPEND_BITS2(rawolen, ol = append_final_bits(out, olen, ol, state, is_all_upper, usx_hcodes, usx_hcode_lens));
        return (ol / 8) * 4 + (((ol - orig_ol) / 8) & 3);
    } else {
        const int rst = (ol + 7) / 8;
        append_final_bits(out, rst, ol, state, is_all_upper, usx_hcodes, usx_hcode_lens);
        return rst;
    }
}

// Main API function. See unishox2.h for documentation
int unishox2_compress(const char *in, int len, UNISHOX_API_OUT_AND_LEN(char *out, int olen), const uint8_t usx_hcodes[],
                      const uint8_t usx_hcode_lens[], const char *usx_freq_seq[], const char *usx_templates[])
{
    return unishox2_compress_lines(in, len, UNISHOX_API_OUT_AND_LEN(out, olen), usx_hcodes, usx_hcode_lens, usx_freq_seq,
                                   usx_templates, NULL);
}

// Main API function. See unishox2.h for documentation
int unishox2_compress_simple(const char *in, int len, char *out)
{
    return unishox2_compress_lines(in, len, UNISHOX_API_OUT_AND_LEN(out, INT_MAX - 1), USX_HCODES_DFLT, USX_HCODE_LENS_DFLT,
                                   USX_FREQ_SEQ_DFLT, USX_TEMPLATES, NULL);
}

// Reads one bit from in
int readBit(const char *in, int bit_no)
{
    return in[bit_no >> 3] & (0x80 >> (bit_no % 8));
}

// Reads next 8 bits, if available
int read8bitCode(const char *in, int len, int bit_no)
{
    int bit_pos = bit_no & 0x07;
    int char_pos = bit_no >> 3;
    len >>= 3;
    uint8_t code = (((uint8_t)in[char_pos]) << bit_pos);
    char_pos++;
    if (char_pos < len) {
        code |= ((uint8_t)in[char_pos]) >> (8 - bit_pos);
    } else
        code |= (0xFF >> (8 - bit_pos));
    return code;
}

/// The list of veritical codes is split into 5 sections. Used by readVCodeIdx()
#define SECTION_COUNT 5
/// Used by readVCodeIdx() for finding the section under which the code read using read8bitCode() falls
uint8_t usx_vsections[] = {0x7F, 0xBF, 0xDF, 0xEF, 0xFF};
/// Used by readVCodeIdx() for finding the section vertical position offset
uint8_t usx_vsection_pos[] = {0, 4, 8, 12, 20};
/// Used by readVCodeIdx() for masking the code read by read8bitCode()
uint8_t usx_vsection_mask[] = {0x7F, 0x3F, 0x1F, 0x0F, 0x0F};
/// Used by readVCodeIdx() for shifting the code read by read8bitCode() to obtain the vpos
uint8_t usx_vsection_shift[] = {5, 4, 3, 1, 0};

/// Vertical decoder lookup table - 3 bits code len, 5 bytes vertical pos
/// code len is one less as 8 cannot be accommodated in 3 bits
uint8_t usx_vcode_lookup[36] = {(1 << 5) + 0,  (1 << 5) + 0,  (2 << 5) + 1,  (2 << 5) + 2,  // Section 1
                                (3 << 5) + 3,  (3 << 5) + 4,  (3 << 5) + 5,  (3 << 5) + 6,  // Section 2
                                (3 << 5) + 7,  (3 << 5) + 7,  (4 << 5) + 8,  (4 << 5) + 9,  // Section 3
                                (5 << 5) + 10, (5 << 5) + 10, (5 << 5) + 11, (5 << 5) + 11, // Section 4
                                (5 << 5) + 12, (5 << 5) + 12, (6 << 5) + 13, (6 << 5) + 14, (6 << 5) + 15, (6 << 5) + 15,
                                (6 << 5) + 16, (6 << 5) + 16, // Section 5
                                (6 << 5) + 17, (6 << 5) + 17, (7 << 5) + 18, (7 << 5) + 19, (7 << 5) + 20, (7 << 5) + 21,
                                (7 << 5) + 22, (7 << 5) + 23, (7 << 5) + 24, (7 << 5) + 25, (7 << 5) + 26, (7 << 5) + 27};

/// Decodes the vertical code from the given bitstream at in \n
/// This is designed to use less memory using a 36 uint8_t buffer \n
/// compared to using a 256 uint8_t buffer to decode the next 8 bits read by read8bitCode() \n
/// by splitting the list of vertical codes. \n
/// Decoder is designed for using less memory, not speed. \n
/// Returns the veritical code index or 99 if match could not be found. \n
/// Also updates bit_no_p with how many ever bits used by the vertical code.
int readVCodeIdx(const char *in, int len, int *bit_no_p)
{
    if (*bit_no_p < len) {
        uint8_t code = read8bitCode(in, len, *bit_no_p);
        int i = 0;
        do {
            if (code <= usx_vsections[i]) {
                uint8_t vcode = usx_vcode_lookup[usx_vsection_pos[i] + ((code & usx_vsection_mask[i]) >> usx_vsection_shift[i])];
                (*bit_no_p) += ((vcode >> 5) + 1);
                if (*bit_no_p > len)
                    return 99;
                return vcode & 0x1F;
            }
        } while (++i < SECTION_COUNT);
    }
    return 99;
}

/// Mask for retrieving each code to be decoded according to its length \n
/// Same as usx_mask so redundant
uint8_t len_masks[] = {0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF};
/// Decodes the horizontal code from the given bitstream at in \n
/// depending on the hcodes defined using usx_hcodes and usx_hcode_lens \n
/// Returns the horizontal code index or 99 if match could not be found. \n
/// Also updates bit_no_p with how many ever bits used by the horizontal code.
int readHCodeIdx(const char *in, int len, int *bit_no_p, const uint8_t usx_hcodes[], const uint8_t usx_hcode_lens[])
{
    if (!usx_hcode_lens[USX_ALPHA])
        return USX_ALPHA;
    if (*bit_no_p < len) {
        uint8_t code = read8bitCode(in, len, *bit_no_p);
        for (int code_pos = 0; code_pos < 5; code_pos++) {
            if (usx_hcode_lens[code_pos] && (code & len_masks[usx_hcode_lens[code_pos] - 1]) == usx_hcodes[code_pos]) {
                *bit_no_p += usx_hcode_lens[code_pos];
                return code_pos;
            }
        }
    }
    return 99;
}

// TODO: Last value check.. Also len check in readBit
/// Returns the position of step code (0, 10, 110, etc.) encountered in the stream
int getStepCodeIdx(const char *in, int len, int *bit_no_p, int limit)
{
    int idx = 0;
    while (*bit_no_p < len && readBit(in, *bit_no_p)) {
        idx++;
        (*bit_no_p)++;
        if (idx == limit)
            return idx;
    }
    if (*bit_no_p >= len)
        return 99;
    (*bit_no_p)++;
    return idx;
}

/// Reads specified number of bits and builds the corresponding integer
int32_t getNumFromBits(const char *in, int len, int bit_no, int count)
{
    int32_t ret = 0;
    while (count-- && bit_no < len) {
        ret += (readBit(in, bit_no) ? 1 << count : 0);
        bit_no++;
    }
    return count < 0 ? ret : -1;
}

/// Decodes the count from the given bit stream at in. Also updates bit_no_p
int32_t readCount(const char *in, int *bit_no_p, int len)
{
    int idx = getStepCodeIdx(in, len, bit_no_p, 4);
    if (idx == 99)
        return -1;
    if (*bit_no_p + count_bit_lens[idx] - 1 >= len)
        return -1;
    int32_t count = getNumFromBits(in, len, *bit_no_p, count_bit_lens[idx]) + (idx ? count_adder[idx - 1] : 0);
    (*bit_no_p) += count_bit_lens[idx];
    return count;
}

/// Decodes the Unicode codepoint from the given bit stream at in. Also updates bit_no_p \n
/// When the step code is 5, reads the next step code to find out the special code.
int32_t readUnicode(const char *in, int *bit_no_p, int len)
{
    int idx = getStepCodeIdx(in, len, bit_no_p, 5);
    if (idx == 99)
        return 0x7FFFFF00 + 99;
    if (idx == 5) {
        idx = getStepCodeIdx(in, len, bit_no_p, 4);
        return 0x7FFFFF00 + idx;
    }
    if (idx >= 0) {
        int sign = (*bit_no_p < len ? readBit(in, *bit_no_p) : 0);
        (*bit_no_p)++;
        if (*bit_no_p + uni_bit_len[idx] - 1 >= len)
            return 0x7FFFFF00 + 99;
        int32_t count = getNumFromBits(in, len, *bit_no_p, uni_bit_len[idx]);
        count += uni_adder[idx];
        (*bit_no_p) += uni_bit_len[idx];
        // printf("Sign: %d, Val:%d", sign, count);
        return sign ? -count : count;
    }
    return 0;
}

/// Macro to ensure that the decoder does not append more than olen bytes to out
#define DEC_OUTPUT_CHAR(out, olen, ol, c)                                                                                        \
    do {                                                                                                                         \
        char *const obuf = (out);                                                                                                \
        const int oidx = (ol);                                                                                                   \
        const int limit = (olen);                                                                                                \
        if (limit <= oidx)                                                                                                       \
            return limit + 1;                                                                                                    \
        else if (oidx < 0)                                                                                                       \
            return 0;                                                                                                            \
        else                                                                                                                     \
            obuf[oidx] = (c);                                                                                                    \
    } while (0)

/// Macro to ensure that the decoder does not append more than olen bytes to out
#define DEC_OUTPUT_CHARS(olen, exp)                                                                                              \
    do {                                                                                                                         \
        const int newidx = (exp);                                                                                                \
        const int limit = (olen);                                                                                                \
        if (newidx > limit)                                                                                                      \
            return limit + 1;                                                                                                    \
    } while (0)

/// Write given unicode code point to out as a UTF-8 sequence
int writeUTF8(char *out, int olen, int ol, int uni)
{
    if (uni < (1 << 11)) {
        DEC_OUTPUT_CHAR(out, olen, ol++, 0xC0 + (uni >> 6));
        DEC_OUTPUT_CHAR(out, olen, ol++, 0x80 + (uni & 0x3F));
    } else if (uni < (1 << 16)) {
        DEC_OUTPUT_CHAR(out, olen, ol++, 0xE0 + (uni >> 12));
        DEC_OUTPUT_CHAR(out, olen, ol++, 0x80 + ((uni >> 6) & 0x3F));
        DEC_OUTPUT_CHAR(out, olen, ol++, 0x80 + (uni & 0x3F));
    } else {
        DEC_OUTPUT_CHAR(out, olen, ol++, 0xF0 + (uni >> 18));
        DEC_OUTPUT_CHAR(out, olen, ol++, 0x80 + ((uni >> 12) & 0x3F));
        DEC_OUTPUT_CHAR(out, olen, ol++, 0x80 + ((uni >> 6) & 0x3F));
        DEC_OUTPUT_CHAR(out, olen, ol++, 0x80 + (uni & 0x3F));
    }
    return ol;
}

/// Decode repeating sequence and appends to out
int decodeRepeat(const char *in, int len, char *out, int olen, int ol, int *bit_no, struct us_lnk_lst *prev_lines)
{
    if (prev_lines) {
        int32_t dict_len = readCount(in, bit_no, len) + NICE_LEN;
        if (dict_len < NICE_LEN)
            return -1;
        int32_t dist = readCount(in, bit_no, len);
        if (dist < 0)
            return -1;
        int32_t ctx = readCount(in, bit_no, len);
        if (ctx < 0)
            return -1;
        struct us_lnk_lst *cur_line = prev_lines;
        const int left = olen - ol;
        while (ctx-- && cur_line)
            cur_line = cur_line->previous;
        if (cur_line == NULL)
            return -1;
        if (left <= 0)
            return olen + 1;
        if ((size_t)dist >= strlen(cur_line->data))
            return -1;
        memmove(out + ol, cur_line->data + dist, min_of(left, dict_len));
        if (left < dict_len)
            return olen + 1;
        ol += dict_len;
    } else {
        int32_t dict_len = readCount(in, bit_no, len) + NICE_LEN;
        if (dict_len < NICE_LEN)
            return -1;
        int32_t dist = readCount(in, bit_no, len) + NICE_LEN - 1;
        if (dist < NICE_LEN - 1)
            return -1;
        const int32_t left = olen - ol;
        // printf("Decode len: %d, dist: %d\n", dict_len - NICE_LEN, dist - NICE_LEN + 1);
        if (left <= 0)
            return olen + 1;
        if (ol - dist < 0)
            return -1;
        memmove(out + ol, out + ol - dist, min_of(left, dict_len));
        if (left < dict_len)
            return olen + 1;
        ol += dict_len;
    }
    return ol;
}

/// Returns hex character corresponding to the 4 bit nibble
char getHexChar(int32_t nibble, int hex_type)
{
    if (nibble >= 0 && nibble <= 9)
        return '0' + nibble;
    else if (hex_type < USX_NIB_HEX_UPPER)
        return 'a' + nibble - 10;
    return 'A' + nibble - 10;
}

// Main API function. See unishox2.h for documentation
int unishox2_decompress_lines(const char *in, int len, UNISHOX_API_OUT_AND_LEN(char *out, int olen), const uint8_t usx_hcodes[],
                              const uint8_t usx_hcode_lens[], const char *usx_freq_seq[], const char *usx_templates[],
                              struct us_lnk_lst *prev_lines)
{

    int dstate;
    int bit_no;
    int h, v;
    uint8_t is_all_upper;
#if (UNISHOX_API_OUT_AND_LEN(0, 1)) == 0
    const int olen = INT_MAX - 1;
#endif

    init_coder();
    int ol = 0;
    bit_no = UNISHOX_MAGIC_BIT_LEN; // ignore the magic bit
    dstate = h = USX_ALPHA;
    is_all_upper = 0;

    int prev_uni = 0;

    len <<= 3;
    while (bit_no < len) {
        int orig_bit_no = bit_no;
        if (dstate == USX_DELTA || h == USX_DELTA) {
            if (dstate != USX_DELTA)
                h = dstate;
            int32_t delta = readUnicode(in, &bit_no, len);
            if ((delta >> 8) == 0x7FFFFF) {
                int spl_code_idx = delta & 0x000000FF;
                if (spl_code_idx == 99)
                    break;
                switch (spl_code_idx) {
                case 0:
                    DEC_OUTPUT_CHAR(out, olen, ol++, ' ');
                    continue;
                case 1:
                    h = readHCodeIdx(in, len, &bit_no, usx_hcodes, usx_hcode_lens);
                    if (h == 99) {
                        bit_no = len;
                        continue;
                    }
                    if (h == USX_DELTA || h == USX_ALPHA) {
                        dstate = h;
                        continue;
                    }
                    if (h == USX_DICT) {
                        int rpt_ret = decodeRepeat(in, len, out, olen, ol, &bit_no, prev_lines);
                        if (rpt_ret < 0)
                            return ol; // if we break here it will only break out of switch
                        DEC_OUTPUT_CHARS(olen, ol = rpt_ret);
                        h = dstate;
                        continue;
                    }
                    break;
                case 2:
                    DEC_OUTPUT_CHAR(out, olen, ol++, ',');
                    continue;
                case 3:
                    DEC_OUTPUT_CHAR(out, olen, ol++, '.');
                    continue;
                case 4:
                    DEC_OUTPUT_CHAR(out, olen, ol++, 10);
                    continue;
                }
            } else {
                prev_uni += delta;
                DEC_OUTPUT_CHARS(olen, ol = writeUTF8(out, olen, ol, prev_uni));
                // printf("%ld, ", prev_uni);
            }
            if (dstate == USX_DELTA && h == USX_DELTA)
                continue;
        } else
            h = dstate;
        char c = 0;
        uint8_t is_upper = is_all_upper;
        v = readVCodeIdx(in, len, &bit_no);
        if (v == 99 || h == 99) {
            bit_no = orig_bit_no;
            break;
        }
        if (v == 0 && h != USX_SYM) {
            if (bit_no >= len)
                break;
            if (h != USX_NUM || dstate != USX_DELTA) {
                h = readHCodeIdx(in, len, &bit_no, usx_hcodes, usx_hcode_lens);
                if (h == 99 || bit_no >= len) {
                    bit_no = orig_bit_no;
                    break;
                }
            }
            if (h == USX_ALPHA) {
                if (dstate == USX_ALPHA) {
                    if (!usx_hcode_lens[USX_ALPHA] &&
                        TERM_BYTE_PRESET_1 ==
                            (read8bitCode(in, len, bit_no - SW_CODE_LEN) &
                             (0xFF << (8 - (is_all_upper ? TERM_BYTE_PRESET_1_LEN_UPPER : TERM_BYTE_PRESET_1_LEN_LOWER)))))
                        break; // Terminator for preset 1
                    if (is_all_upper) {
                        is_upper = is_all_upper = 0;
                        continue;
                    }
                    v = readVCodeIdx(in, len, &bit_no);
                    if (v == 99) {
                        bit_no = orig_bit_no;
                        break;
                    }
                    if (v == 0) {
                        h = readHCodeIdx(in, len, &bit_no, usx_hcodes, usx_hcode_lens);
                        if (h == 99) {
                            bit_no = orig_bit_no;
                            break;
                        }
                        if (h == USX_ALPHA) {
                            is_all_upper = 1;
                            continue;
                        }
                    }
                    is_upper = 1;
                } else {
                    dstate = USX_ALPHA;
                    continue;
                }
            } else if (h == USX_DICT) {
                int rpt_ret = decodeRepeat(in, len, out, olen, ol, &bit_no, prev_lines);
                if (rpt_ret < 0)
                    break;
                DEC_OUTPUT_CHARS(olen, ol = rpt_ret);
                continue;
            } else if (h == USX_DELTA) {
                // printf("Sign: %d, bitno: %d\n", sign, bit_no);
                // printf("Code: %d\n", prev_uni);
                // printf("BitNo: %d\n", bit_no);
                continue;
            } else {
                if (h != USX_NUM || dstate != USX_DELTA)
                    v = readVCodeIdx(in, len, &bit_no);
                if (v == 99) {
                    bit_no = orig_bit_no;
                    break;
                }
                if (h == USX_NUM && v == 0) {
                    int idx = getStepCodeIdx(in, len, &bit_no, 5);
                    if (idx == 99)
                        break;
                    if (idx == 0) {
                        idx = getStepCodeIdx(in, len, &bit_no, 4);
                        if (idx >= 5)
                            break;
                        int32_t rem = readCount(in, &bit_no, len);
                        if (rem < 0)
                            break;
                        if (usx_templates[idx] == NULL)
                            break;
                        size_t tlen = strlen(usx_templates[idx]);
                        if ((size_t)rem > tlen)
                            break;
                        rem = tlen - rem;
                        int eof = 0;
                        for (int j = 0; j < rem; j++) {
                            char c_t = usx_templates[idx][j];
                            if (c_t == 'f' || c_t == 'r' || c_t == 't' || c_t == 'o' || c_t == 'F') {
                                char nibble_len = (c_t == 'f' || c_t == 'F' ? 4 : (c_t == 'r' ? 3 : (c_t == 't' ? 2 : 1)));
                                const int32_t raw_char = getNumFromBits(in, len, bit_no, nibble_len);
                                if (raw_char < 0) {
                                    eof = 1;
                                    break;
                                }
                                DEC_OUTPUT_CHAR(out, olen, ol++,
                                                getHexChar((char)raw_char, c_t == 'f' ? USX_NIB_HEX_LOWER : USX_NIB_HEX_UPPER));
                                bit_no += nibble_len;
                            } else
                                DEC_OUTPUT_CHAR(out, olen, ol++, c_t);
                        }
                        if (eof)
                            break; // reach input eof
                    } else if (idx == 5) {
                        int32_t bin_count = readCount(in, &bit_no, len);
                        if (bin_count < 0)
                            break;
                        if (bin_count == 0) // invalid encoding
                            break;
                        do {
                            const int32_t raw_char = getNumFromBits(in, len, bit_no, 8);
                            if (raw_char < 0)
                                break;
                            DEC_OUTPUT_CHAR(out, olen, ol++, (char)raw_char);
                            bit_no += 8;
                        } while (--bin_count);
                        if (bin_count > 0)
                            break; // reach input eof
                    } else {
                        int32_t nibble_count = 0;
                        if (idx == 2 || idx == 4)
                            nibble_count = 32;
                        else {
                            nibble_count = readCount(in, &bit_no, len);
                            if (nibble_count < 0)
                                break;
                            if (nibble_count == 0) // invalid encoding
                                break;
                        }
                        do {
                            int32_t nibble = getNumFromBits(in, len, bit_no, 4);
                            if (nibble < 0)
                                break;
                            DEC_OUTPUT_CHAR(out, olen, ol++, getHexChar(nibble, idx < 3 ? USX_NIB_HEX_LOWER : USX_NIB_HEX_UPPER));
                            if ((idx == 2 || idx == 4) &&
                                (nibble_count == 25 || nibble_count == 21 || nibble_count == 17 || nibble_count == 13))
                                DEC_OUTPUT_CHAR(out, olen, ol++, '-');
                            bit_no += 4;
                        } while (--nibble_count);
                        if (nibble_count > 0)
                            break; // reach input eof
                    }
                    if (dstate == USX_DELTA)
                        h = USX_DELTA;
                    continue;
                }
            }
        }
        if (is_upper && v == 1) {
            h = dstate = USX_DELTA; // continuous delta coding
            continue;
        }
        if (h < 3 && v < 28)
            c = usx_sets[h][v];
        if (c >= 'a' && c <= 'z') {
            dstate = USX_ALPHA;
            if (is_upper)
                c -= 32;
        } else {
            if (c >= '0' && c <= '9') {
                dstate = USX_NUM;
            } else if (c == 0) {
                if (v == 8) {
                    DEC_OUTPUT_CHAR(out, olen, ol++, '\r');
                    DEC_OUTPUT_CHAR(out, olen, ol++, '\n');
                } else if (h == USX_NUM && v == 26) {
                    int32_t count = readCount(in, &bit_no, len);
                    if (count < 0)
                        break;
                    count += 4;
                    if (ol <= 0)
                        return 0; // invalid encoding
                    char rpt_c = out[ol - 1];
                    while (count--)
                        DEC_OUTPUT_CHAR(out, olen, ol++, rpt_c);
                } else if (h == USX_SYM && v > 24) {
                    v -= 25;
                    const int freqlen = (int)strlen(usx_freq_seq[v]);
                    const int left = olen - ol;
                    if (left <= 0)
                        return olen + 1;
                    memcpy(out + ol, usx_freq_seq[v], min_of(left, freqlen));
                    if (left < freqlen)
                        return olen + 1;
                    ol += freqlen;
                } else if (h == USX_NUM && v > 22 && v < 26) {
                    v -= (23 - 3);
                    const int freqlen = (int)strlen(usx_freq_seq[v]);
                    const int left = olen - ol;
                    if (left <= 0)
                        return olen + 1;
                    memcpy(out + ol, usx_freq_seq[v], min_of(left, freqlen));
                    if (left < freqlen)
                        return olen + 1;
                    ol += freqlen;
                } else
                    break; // Terminator
                if (dstate == USX_DELTA)
                    h = USX_DELTA;
                continue;
            }
        }
        if (dstate == USX_DELTA)
            h = USX_DELTA;
        DEC_OUTPUT_CHAR(out, olen, ol++, c);
    }

    return ol;
}

// Main API function. See unishox2.h for documentation
int unishox2_decompress(const char *in, int len, UNISHOX_API_OUT_AND_LEN(char *out, int olen), const uint8_t usx_hcodes[],
                        const uint8_t usx_hcode_lens[], const char *usx_freq_seq[], const char *usx_templates[])
{
    return unishox2_decompress_lines(in, len, UNISHOX_API_OUT_AND_LEN(out, olen), usx_hcodes, usx_hcode_lens, usx_freq_seq,
                                     usx_templates, NULL);
}

// Main API function. See unishox2.h for documentation
int unishox2_decompress_simple(const char *in, int len, char *out)
{
    return unishox2_decompress(in, len, UNISHOX_API_OUT_AND_LEN(out, INT_MAX - 1), USX_PSET_DFLT);
}