Beginning of compression

This commit is contained in:
Jm Casler 2022-04-11 20:09:48 -07:00
parent 02066cc8b9
commit 478274aff1
3 changed files with 1660 additions and 12 deletions

View File

@ -1,12 +1,15 @@
#include "configuration.h"
#include "Router.h" #include "Router.h"
#include "Channels.h" #include "Channels.h"
#include "CryptoEngine.h" #include "CryptoEngine.h"
#include "NodeDB.h" #include "NodeDB.h"
#include "RTC.h" #include "RTC.h"
#include "configuration.h"
#include "main.h" #include "main.h"
#include "mesh-pb-constants.h" #include "mesh-pb-constants.h"
#include "modules/RoutingModule.h" #include "modules/RoutingModule.h"
extern "C" {
#include "mesh/compression/unishox2.h"
}
#if defined(HAS_WIFI) || defined(PORTDUINO) #if defined(HAS_WIFI) || defined(PORTDUINO)
#include "mqtt/MQTT.h" #include "mqtt/MQTT.h"
@ -210,12 +213,10 @@ ErrorCode Router::send(MeshPacket *p)
if (p->which_payloadVariant == MeshPacket_decoded_tag) { if (p->which_payloadVariant == MeshPacket_decoded_tag) {
ChannelIndex chIndex = p->channel; // keep as a local because we are about to change it ChannelIndex chIndex = p->channel; // keep as a local because we are about to change it
#if defined(HAS_WIFI) || defined(PORTDUINO) #if defined(HAS_WIFI) || defined(PORTDUINO)
//check if we should send decrypted packets to mqtt // check if we should send decrypted packets to mqtt
//truth table: // truth table:
/* mqtt_server mqtt_encryption_enabled should_encrypt /* mqtt_server mqtt_encryption_enabled should_encrypt
* not set 0 1 * not set 0 1
* not set 1 1 * not set 1 1
@ -232,7 +233,7 @@ ErrorCode Router::send(MeshPacket *p)
DEBUG_MSG("Should encrypt MQTT?: %d\n", shouldActuallyEncrypt); DEBUG_MSG("Should encrypt MQTT?: %d\n", shouldActuallyEncrypt);
//the packet is currently in a decrypted state. send it now if they want decrypted packets // the packet is currently in a decrypted state. send it now if they want decrypted packets
if (mqtt && !shouldActuallyEncrypt) if (mqtt && !shouldActuallyEncrypt)
mqtt->onSend(*p, chIndex); mqtt->onSend(*p, chIndex);
#endif #endif
@ -244,8 +245,8 @@ ErrorCode Router::send(MeshPacket *p)
} }
#if defined(HAS_WIFI) || defined(PORTDUINO) #if defined(HAS_WIFI) || defined(PORTDUINO)
//the packet is now encrypted. // the packet is now encrypted.
//check if we should send encrypted packets to mqtt // check if we should send encrypted packets to mqtt
if (mqtt && shouldActuallyEncrypt) if (mqtt && shouldActuallyEncrypt)
mqtt->onSend(*p, chIndex); mqtt->onSend(*p, chIndex);
#endif #endif
@ -276,7 +277,7 @@ bool perhapsDecode(MeshPacket *p)
if (p->which_payloadVariant == MeshPacket_decoded_tag) if (p->which_payloadVariant == MeshPacket_decoded_tag)
return true; // If packet was already decoded just return return true; // If packet was already decoded just return
//assert(p->which_payloadVariant == MeshPacket_encrypted_tag); // assert(p->which_payloadVariant == MeshPacket_encrypted_tag);
// Try to find a channel that works with this hash // Try to find a channel that works with this hash
for (ChannelIndex chIndex = 0; chIndex < channels.getNumChannels(); chIndex++) { for (ChannelIndex chIndex = 0; chIndex < channels.getNumChannels(); chIndex++) {
@ -324,6 +325,30 @@ Routing_Error perhapsEncode(MeshPacket *p)
size_t numbytes = pb_encode_to_bytes(bytes, sizeof(bytes), Data_fields, &p->decoded); size_t numbytes = pb_encode_to_bytes(bytes, sizeof(bytes), Data_fields, &p->decoded);
if (1) {
// int orig_len, compressed_len;
int compressed_len;
char compressed_out[100] = {0};
// char *orig = (char *)"Hiiiiiiii! :) How are you doing? I am doing well.";
// char *orig = (char *)"Meshtastic";
// orig_len = strlen(orig);
// compressed_len = unishox2_compress_simple(orig, orig_len, compressed_out);
compressed_len = unishox2_compress_simple((char *)bytes, numbytes, compressed_out);
Serial.println(compressed_len);
Serial.println(compressed_out);
//&p->decoded.portnum;
char decompressed_out[100] = {};
int decompressed_len;
decompressed_len = unishox2_decompress_simple(compressed_out, compressed_len, decompressed_out);
Serial.println(decompressed_len);
Serial.println(decompressed_out);
}
if (numbytes > MAX_RHPACKETLEN) if (numbytes > MAX_RHPACKETLEN)
return Routing_Error_TOO_LARGE; return Routing_Error_TOO_LARGE;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,278 @@
/*
* Copyright (C) 2020 Siara Logics (cc)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* @author Arundale Ramanathan
*
*/
/**
* @file unishox2.h
* @author Arundale Ramanathan, James Z. M. Gao
* @brief API for Unishox2 Compression and Decompression
*
* This file describes each function of the Unishox2 API \n
* For finding out how this API can be used in your program, \n
* please see test_unishox2.c.
*/
#ifndef unishox2
#define unishox2
#define UNISHOX_VERSION "2.0" ///< Unicode spec version
/**
* Macro switch to enable/disable output buffer length parameter in low level api \n
* Disabled by default \n
* When this macro is defined, the all the API functions \n
* except the simple API functions accept an additional parameter olen \n
* that enables the developer to pass the size of the output buffer provided \n
* so that the api function may not write beyond that length. \n
* This can be disabled if the developer knows that the buffer provided is sufficient enough \n
* so no additional parameter is passed and the program is faster since additional check \n
* for output length is not performed at each step \n
* The simple api, i.e. unishox2_(de)compress_simple will always omit the buffer length
*/
#ifndef UNISHOX_API_WITH_OUTPUT_LEN
# define UNISHOX_API_WITH_OUTPUT_LEN 0
#endif
/// Upto 8 bits of initial magic bit sequence can be included. Bit count can be specified with UNISHOX_MAGIC_BIT_LEN
#ifndef UNISHOX_MAGIC_BITS
# define UNISHOX_MAGIC_BITS 0xFF
#endif
/// Desired length of Magic bits defined by UNISHOX_MAGIC_BITS
#ifdef UNISHOX_MAGIC_BIT_LEN
# if UNISHOX_MAGIC_BIT_LEN < 0 || 9 <= UNISHOX_MAGIC_BIT_LEN
# error "UNISHOX_MAGIC_BIT_LEN need between [0, 8)"
# endif
#else
# define UNISHOX_MAGIC_BIT_LEN 1
#endif
//enum {USX_ALPHA = 0, USX_SYM, USX_NUM, USX_DICT, USX_DELTA};
/// Default Horizontal codes. When composition of text is know beforehand, the other hcodes in this section can be used to achieve more compression.
#define USX_HCODES_DFLT (const unsigned char[]) {0x00, 0x40, 0x80, 0xC0, 0xE0}
/// Length of each default hcode
#define USX_HCODE_LENS_DFLT (const unsigned char[]) {2, 2, 2, 3, 3}
/// Horizontal codes preset for English Alphabet content only
#define USX_HCODES_ALPHA_ONLY (const unsigned char[]) {0x00, 0x00, 0x00, 0x00, 0x00}
/// Length of each Alpha only hcode
#define USX_HCODE_LENS_ALPHA_ONLY (const unsigned char[]) {0, 0, 0, 0, 0}
/// Horizontal codes preset for Alpha Numeric content only
#define USX_HCODES_ALPHA_NUM_ONLY (const unsigned char[]) {0x00, 0x00, 0x80, 0x00, 0x00}
/// Length of each Alpha numeric hcode
#define USX_HCODE_LENS_ALPHA_NUM_ONLY (const unsigned char[]) {1, 0, 1, 0, 0}
/// Horizontal codes preset for Alpha Numeric and Symbol content only
#define USX_HCODES_ALPHA_NUM_SYM_ONLY (const unsigned char[]) {0x00, 0x80, 0xC0, 0x00, 0x00}
/// Length of each Alpha numeric and symbol hcodes
#define USX_HCODE_LENS_ALPHA_NUM_SYM_ONLY (const unsigned char[]) {1, 2, 2, 0, 0}
/// Horizontal codes preset favouring Alphabet content
#define USX_HCODES_FAVOR_ALPHA (const unsigned char[]) {0x00, 0x80, 0xA0, 0xC0, 0xE0}
/// Length of each hcode favouring Alpha content
#define USX_HCODE_LENS_FAVOR_ALPHA (const unsigned char[]) {1, 3, 3, 3, 3}
/// Horizontal codes preset favouring repeating sequences
#define USX_HCODES_FAVOR_DICT (const unsigned char[]) {0x00, 0x40, 0xC0, 0x80, 0xE0}
/// Length of each hcode favouring repeating sequences
#define USX_HCODE_LENS_FAVOR_DICT (const unsigned char[]) {2, 2, 3, 2, 3}
/// Horizontal codes preset favouring symbols
#define USX_HCODES_FAVOR_SYM (const unsigned char[]) {0x80, 0x00, 0xA0, 0xC0, 0xE0}
/// Length of each hcode favouring symbols
#define USX_HCODE_LENS_FAVOR_SYM (const unsigned char[]) {3, 1, 3, 3, 3}
//#define USX_HCODES_FAVOR_UMLAUT {0x00, 0x40, 0xE0, 0xC0, 0x80}
//#define USX_HCODE_LENS_FAVOR_UMLAUT {2, 2, 3, 3, 2}
/// Horizontal codes preset favouring umlaut letters
#define USX_HCODES_FAVOR_UMLAUT (const unsigned char[]) {0x80, 0xA0, 0xC0, 0xE0, 0x00}
/// Length of each hcode favouring umlaut letters
#define USX_HCODE_LENS_FAVOR_UMLAUT (const unsigned char[]) {3, 3, 3, 3, 1}
/// Horizontal codes preset for no repeating sequences
#define USX_HCODES_NO_DICT (const unsigned char[]) {0x00, 0x40, 0x80, 0x00, 0xC0}
/// Length of each hcode for no repeating sequences
#define USX_HCODE_LENS_NO_DICT (const unsigned char[]) {2, 2, 2, 0, 2}
/// Horizontal codes preset for no Unicode characters
#define USX_HCODES_NO_UNI (const unsigned char[]) {0x00, 0x40, 0x80, 0xC0, 0x00}
/// Length of each hcode for no Unicode characters
#define USX_HCODE_LENS_NO_UNI (const unsigned char[]) {2, 2, 2, 2, 0}
/// Default frequently occuring sequences. When composition of text is know beforehand, the other sequences in this section can be used to achieve more compression.
#define USX_FREQ_SEQ_DFLT (const char *[]) {"\": \"", "\": ", "</", "=\"", "\":\"", "://"}
/// Frequently occuring sequences in text content
#define USX_FREQ_SEQ_TXT (const char *[]) {" the ", " and ", "tion", " with", "ing", "ment"}
/// Frequently occuring sequences in URL content
#define USX_FREQ_SEQ_URL (const char *[]) {"https://", "www.", ".com", "http://", ".org", ".net"}
/// Frequently occuring sequences in JSON content
#define USX_FREQ_SEQ_JSON (const char *[]) {"\": \"", "\": ", "\",", "}}}", "\":\"", "}}"}
/// Frequently occuring sequences in HTML content
#define USX_FREQ_SEQ_HTML (const char *[]) {"</", "=\"", "div", "href", "class", "<p>"}
/// Frequently occuring sequences in XML content
#define USX_FREQ_SEQ_XML (const char *[]) {"</", "=\"", "\">", "<?xml version=\"1.0\"", "xmlns:", "://"}
/// Commonly occuring templates (ISO Date/Time, ISO Date, US Phone number, ISO Time, Unused)
#define USX_TEMPLATES (const char *[]) {"tfff-of-tfTtf:rf:rf.fffZ", "tfff-of-tf", "(fff) fff-ffff", "tf:rf:rf", 0}
/// Default preset parameter set. When composition of text is know beforehand, the other parameter sets in this section can be used to achieve more compression.
#define USX_PSET_DFLT USX_HCODES_DFLT, USX_HCODE_LENS_DFLT, USX_FREQ_SEQ_DFLT, USX_TEMPLATES
/// Preset parameter set for English Alphabet only content
#define USX_PSET_ALPHA_ONLY USX_HCODES_ALPHA_ONLY, USX_HCODE_LENS_ALPHA_ONLY, USX_FREQ_SEQ_TXT, USX_TEMPLATES
/// Preset parameter set for Alpha numeric content
#define USX_PSET_ALPHA_NUM_ONLY USX_HCODES_ALPHA_NUM_ONLY, USX_HCODE_LENS_ALPHA_NUM_ONLY, USX_FREQ_SEQ_TXT, USX_TEMPLATES
/// Preset parameter set for Alpha numeric and symbol content
#define USX_PSET_ALPHA_NUM_SYM_ONLY USX_HCODES_ALPHA_NUM_SYM_ONLY, USX_HCODE_LENS_ALPHA_NUM_SYM_ONLY, USX_FREQ_SEQ_DFLT, USX_TEMPLATES
/// Preset parameter set for Alpha numeric symbol content having predominantly text
#define USX_PSET_ALPHA_NUM_SYM_ONLY_TXT USX_HCODES_ALPHA_NUM_SYM_ONLY, USX_HCODE_LENS_ALPHA_NUM_SYM_ONLY, USX_FREQ_SEQ_DFLT, USX_TEMPLATES
/// Preset parameter set favouring Alphabet content
#define USX_PSET_FAVOR_ALPHA USX_HCODES_FAVOR_ALPHA, USX_HCODE_LENS_FAVOR_ALPHA, USX_FREQ_SEQ_TXT, USX_TEMPLATES
/// Preset parameter set favouring repeating sequences
#define USX_PSET_FAVOR_DICT USX_HCODES_FAVOR_DICT, USX_HCODE_LENS_FAVOR_DICT, USX_FREQ_SEQ_DFLT, USX_TEMPLATES
/// Preset parameter set favouring symbols
#define USX_PSET_FAVOR_SYM USX_HCODES_FAVOR_SYM, USX_HCODE_LENS_FAVOR_SYM, USX_FREQ_SEQ_DFLT, USX_TEMPLATES
/// Preset parameter set favouring unlaut letters
#define USX_PSET_FAVOR_UMLAUT USX_HCODES_FAVOR_UMLAUT, USX_HCODE_LENS_FAVOR_UMLAUT, USX_FREQ_SEQ_DFLT, USX_TEMPLATES
/// Preset parameter set for when there are no repeating sequences
#define USX_PSET_NO_DICT USX_HCODES_NO_DICT, USX_HCODE_LENS_NO_DICT, USX_FREQ_SEQ_DFLT, USX_TEMPLATES
/// Preset parameter set for when there are no unicode symbols
#define USX_PSET_NO_UNI USX_HCODES_NO_UNI, USX_HCODE_LENS_NO_UNI, USX_FREQ_SEQ_DFLT, USX_TEMPLATES
/// Preset parameter set for when there are no unicode symbols favouring text
#define USX_PSET_NO_UNI_FAVOR_TEXT USX_HCODES_NO_UNI, USX_HCODE_LENS_NO_UNI, USX_FREQ_SEQ_TXT, USX_TEMPLATES
/// Preset parameter set favouring URL content
#define USX_PSET_URL USX_HCODES_DFLT, USX_HCODE_LENS_DFLT, USX_FREQ_SEQ_URL, USX_TEMPLATES
/// Preset parameter set favouring JSON content
#define USX_PSET_JSON USX_HCODES_DFLT, USX_HCODE_LENS_DFLT, USX_FREQ_SEQ_JSON, USX_TEMPLATES
/// Preset parameter set favouring JSON content having no Unicode symbols
#define USX_PSET_JSON_NO_UNI USX_HCODES_NO_UNI, USX_HCODE_LENS_NO_UNI, USX_FREQ_SEQ_JSON, USX_TEMPLATES
/// Preset parameter set favouring XML content
#define USX_PSET_XML USX_HCODES_DFLT, USX_HCODE_LENS_DFLT, USX_FREQ_SEQ_XML, USX_TEMPLATES
/// Preset parameter set favouring HTML content
#define USX_PSET_HTML USX_HCODES_DFLT, USX_HCODE_LENS_DFLT, USX_FREQ_SEQ_HTML, USX_TEMPLATES
/**
* This structure is used when a string array needs to be compressed.
* This is passed as a parameter to the unishox2_decompress_lines() function
*/
struct us_lnk_lst {
char *data;
struct us_lnk_lst *previous;
};
/**
* This macro is for internal use, but builds upon the macro UNISHOX_API_WITH_OUTPUT_LEN
* When the macro UNISHOX_API_WITH_OUTPUT_LEN is defined, the all the API functions
* except the simple API functions accept an additional parameter olen
* that enables the developer to pass the size of the output buffer provided
* so that the api function may not write beyond that length.
* This can be disabled if the developer knows that the buffer provided is sufficient enough
* so no additional parameter is passed and the program is faster since additional check
* for output length is not performed at each step
*/
#if defined(UNISHOX_API_WITH_OUTPUT_LEN) && UNISHOX_API_WITH_OUTPUT_LEN != 0
# define UNISHOX_API_OUT_AND_LEN(out, olen) out, olen
#else
# define UNISHOX_API_OUT_AND_LEN(out, olen) out
#endif
/**
* Simple API for compressing a string
* @param[in] in Input ASCII / UTF-8 string
* @param[in] len length in bytes
* @param[out] out output buffer - should be large enough to hold compressed output
*/
extern int unishox2_compress_simple(const char *in, int len, char *out);
/**
* Simple API for decompressing a string
* @param[in] in Input compressed bytes (output of unishox2_compress functions)
* @param[in] len length of 'in' in bytes
* @param[out] out output buffer for ASCII / UTF-8 string - should be large enough
*/
extern int unishox2_decompress_simple(const char *in, int len, char *out);
/**
* Comprehensive API for compressing a string
*
* Presets are available for the last four parameters so they can be passed as single parameter. \n
* See USX_PSET_* macros. Example call: \n
* unishox2_compress(in, len, out, olen, USX_PSET_ALPHA_ONLY);
*
* @param[in] in Input ASCII / UTF-8 string
* @param[in] len length in bytes
* @param[out] out output buffer - should be large enough to hold compressed output
* @param[in] olen length of 'out' buffer in bytes. Can be omitted if sufficient buffer is provided
* @param[in] usx_hcodes Horizontal codes (array of bytes). See macro section for samples.
* @param[in] usx_hcode_lens Length of each element in usx_hcodes array
* @param[in] usx_freq_seq Frequently occuring sequences. See USX_FREQ_SEQ_* macros for samples
* @param[in] usx_templates Templates of frequently occuring patterns. See USX_TEMPLATES macro.
*/
extern int unishox2_compress(const char *in, int len, UNISHOX_API_OUT_AND_LEN(char *out, int olen),
const unsigned char usx_hcodes[], const unsigned char usx_hcode_lens[],
const char *usx_freq_seq[], const char *usx_templates[]);
/**
* Comprehensive API for de-compressing a string
*
* Presets are available for the last four parameters so they can be passed as single parameter. \n
* See USX_PSET_* macros. Example call: \n
* unishox2_decompress(in, len, out, olen, USX_PSET_ALPHA_ONLY);
*
* @param[in] in Input compressed bytes (output of unishox2_compress functions)
* @param[in] len length of 'in' in bytes
* @param[out] out output buffer - should be large enough to hold de-compressed output
* @param[in] olen length of 'out' buffer in bytes. Can be omitted if sufficient buffer is provided
* @param[in] usx_hcodes Horizontal codes (array of bytes). See macro section for samples.
* @param[in] usx_hcode_lens Length of each element in usx_hcodes array
* @param[in] usx_freq_seq Frequently occuring sequences. See USX_FREQ_SEQ_* macros for samples
* @param[in] usx_templates Templates of frequently occuring patterns. See USX_TEMPLATES macro.
*/
extern int unishox2_decompress(const char *in, int len, UNISHOX_API_OUT_AND_LEN(char *out, int olen),
const unsigned char usx_hcodes[], const unsigned char usx_hcode_lens[],
const char *usx_freq_seq[], const char *usx_templates[]);
/**
* More Comprehensive API for compressing array of strings
*
* See unishox2_compress() function for parameter definitions. \n
* This function takes an additional parameter, i.e. 'prev_lines' - the usx_lnk_lst structure \n
* See -g parameter in test_unishox2.c to find out how this can be used. \n
* This function is used when an array of strings need to be compressed \n
* and stored in a compressed array of bytes for use as a constant in other programs \n
* where each element of the array can be decompressed and used at runtime.
*/
extern int unishox2_compress_lines(const char *in, int len, UNISHOX_API_OUT_AND_LEN(char *out, int olen),
const unsigned char usx_hcodes[], const unsigned char usx_hcode_lens[],
const char *usx_freq_seq[], const char *usx_templates[],
struct us_lnk_lst *prev_lines);
/**
* More Comprehensive API for de-compressing array of strings \n
* This function is not be used in conjuction with unishox2_compress_lines()
*
* See unishox2_decompress() function for parameter definitions. \n
* Typically an array is compressed using unishox2_compress_lines() and \n
* a header (.h) file is generated using the resultant compressed array. \n
* This header file can be used in another program with another decompress \n
* routine which takes this compressed array as parameter and index to be \n
* decompressed.
*/
extern int unishox2_decompress_lines(const char *in, int len, UNISHOX_API_OUT_AND_LEN(char *out, int olen),
const unsigned char usx_hcodes[], const unsigned char usx_hcode_lens[],
const char *usx_freq_seq[], const char *usx_templates[],
struct us_lnk_lst *prev_lines);
#endif