diff options
Diffstat (limited to 'circuitpython/supervisor/shared/translate.h')
-rw-r--r-- | circuitpython/supervisor/shared/translate.h | 93 |
1 files changed, 93 insertions, 0 deletions
diff --git a/circuitpython/supervisor/shared/translate.h b/circuitpython/supervisor/shared/translate.h new file mode 100644 index 0000000..da58e1e --- /dev/null +++ b/circuitpython/supervisor/shared/translate.h @@ -0,0 +1,93 @@ +/* + * This file is part of the MicroPython project, http://micropython.org/ + * + * The MIT License (MIT) + * + * Copyright (c) 2018 Scott Shawcroft for Adafruit Industries + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MICROPY_INCLUDED_SUPERVISOR_TRANSLATE_H +#define MICROPY_INCLUDED_SUPERVISOR_TRANSLATE_H + +#include <stdint.h> + +// The format of the compressed data is: +// - the size of the uncompressed string in UTF-8 bytes, encoded as a +// (compress_max_length_bits)-bit number. compress_max_length_bits is +// computed during dictionary generation time, and happens to be 8 +// for all current platforms. However, it'll probably end up being +// 9 in some translations sometime in the future. This length excludes +// the trailing NUL, though notably decompress_length includes it. +// +// - followed by the huffman encoding of the individual UTF-16 code +// points that make up the string. The trailing "\0" is not +// represented by a huffman code, but is implied by the length. +// (building the huffman encoding on UTF-16 code points gave better +// compression than building it on UTF-8 bytes) +// +// - code points starting at 128 (word_start) and potentially extending +// to 255 (word_end) (but never interfering with the target +// language's used code points) stand for dictionary entries in a +// dictionary with size up to 256 code points. The dictionary entries +// are computed with a heuristic based on frequent substrings of 2 to +// 9 code points. These are called "words" but are not, grammatically +// speaking, words. They're just spans of code points that frequently +// occur together. They are ordered shortest to longest. +// +// - dictionary entries are non-overlapping, and the _ending_ index of each +// entry is stored in an array. A count of words of each length, from +// minlen to maxlen, is given in the array called wlencount. From +// this small array, the start and end of the N'th word can be +// calculated by an efficient, small loop. (A bit of time is traded +// to reduce the size of this table indicating lengths) +// +// The "data" / "tail" construct is so that the struct's last member is a +// "flexible array". However, the _only_ member is not permitted to be +// a flexible member, so we have to declare the first byte as a separte +// member of the structure. +// +// For translations where length needs 8 bits, this saves about 1.5 +// bytes per string on average compared to a structure of {uint16_t, +// flexible array}, but is also future-proofed against strings with +// UTF-8 length above 256, with a savings of about 1.375 bytes per +// string. +typedef struct compressed_string { + uint8_t data; + const uint8_t tail[]; +} compressed_string_t; + +// Return the compressed, translated version of a source string +// Usually, due to LTO, this is optimized into a load of a constant +// pointer. +const compressed_string_t *translate(const char *c); +void serial_write_compressed(const compressed_string_t *compressed); +char *decompress(const compressed_string_t *compressed, char *decompressed); +uint16_t decompress_length(const compressed_string_t *compressed); + + +// Map MicroPython's error messages to our translations. +#if defined(MICROPY_ENABLE_DYNRUNTIME) && MICROPY_ENABLE_DYNRUNTIME +#define MP_ERROR_TEXT(x) (x) +#else +#define MP_ERROR_TEXT(x) translate(x) +#endif + +#endif // MICROPY_INCLUDED_SUPERVISOR_TRANSLATE_H |