summaryrefslogtreecommitdiff
path: root/lib/zstd/huf_compress.c
diff options
context:
space:
mode:
authorSrinivasarao P <spathi@codeaurora.org>2018-08-28 17:27:24 +0530
committerSrinivasarao P <spathi@codeaurora.org>2018-08-28 17:28:39 +0530
commitb87d31674ac9e8203f3a30b53f6a697bf5cf36b6 (patch)
tree6b9b8869b7d60d93b4c78b5ab2cd03b3f4a5a2e6 /lib/zstd/huf_compress.c
parent40eed1f50062df731c0bf79af27c463b7d2003a1 (diff)
parent5e24b4e4d3724968a1c106e9dae091ef4dfc578e (diff)
Merge android-4.4.153 (5e24b4e) into msm-4.4
* refs/heads/tmp-5e24b4e Linux 4.4.153 ovl: warn instead of error if d_type is not supported ovl: Do d_type check only if work dir creation was successful ovl: Ensure upper filesystem supports d_type x86/mm: Fix use-after-free of ldt_struct x86/mm/pat: Fix L1TF stable backport for CPA ANDROID: x86_64_cuttlefish_defconfig: Enable lz4 compression for zram UPSTREAM: drivers/block/zram/zram_drv.c: fix bug storing backing_dev BACKPORT: zram: introduce zram memory tracking BACKPORT: zram: record accessed second BACKPORT: zram: mark incompressible page as ZRAM_HUGE UPSTREAM: zram: correct flag name of ZRAM_ACCESS UPSTREAM: zram: Delete gendisk before cleaning up the request queue UPSTREAM: drivers/block/zram/zram_drv.c: make zram_page_end_io() static BACKPORT: zram: set BDI_CAP_STABLE_WRITES once UPSTREAM: zram: fix null dereference of handle UPSTREAM: zram: add config and doc file for writeback feature BACKPORT: zram: read page from backing device BACKPORT: zram: write incompressible pages to backing device BACKPORT: zram: identify asynchronous IO's return value BACKPORT: zram: add free space management in backing device UPSTREAM: zram: add interface to specif backing device UPSTREAM: zram: rename zram_decompress_page to __zram_bvec_read UPSTREAM: zram: inline zram_compress UPSTREAM: zram: clean up duplicated codes in __zram_bvec_write Linux 4.4.152 reiserfs: fix broken xattr handling (heap corruption, bad retval) i2c: imx: Fix race condition in dma read PCI: pciehp: Fix use-after-free on unplug PCI: Skip MPS logic for Virtual Functions (VFs) PCI: hotplug: Don't leak pci_slot on registration failure parisc: Remove unnecessary barriers from spinlock.h bridge: Propagate vlan add failure to user packet: refine ring v3 block size test to hold one frame netfilter: conntrack: dccp: treat SYNC/SYNCACK as invalid if no prior state xfrm_user: prevent leaking 2 bytes of kernel memory parisc: Remove ordered stores from syscall.S ext4: fix spectre gadget in ext4_mb_regular_allocator() KVM: irqfd: fix race between EPOLLHUP and irq_bypass_register_consumer staging: android: ion: check for kref overflow tcp: identify cryptic messages as TCP seq # bugs net: qca_spi: Fix log level if probe fails net: qca_spi: Make sure the QCA7000 reset is triggered net: qca_spi: Avoid packet drop during initial sync net: usb: rtl8150: demote allmulti message to dev_dbg() net/ethernet/freescale/fman: fix cross-build error drm/nouveau/gem: off by one bugs in nouveau_gem_pushbuf_reloc_apply() tcp: remove DELAYED ACK events in DCTCP qlogic: check kstrtoul() for errors packet: reset network header if packet shorter than ll reserved space ixgbe: Be more careful when modifying MAC filters ARM: dts: am3517.dtsi: Disable reference to OMAP3 OTG controller ARM: 8780/1: ftrace: Only set kernel memory back to read-only after boot perf llvm-utils: Remove bashism from kernel include fetch script bnxt_en: Fix for system hang if request_irq fails drm/armada: fix colorkey mode property ieee802154: fakelb: switch from BUG_ON() to WARN_ON() on problem ieee802154: at86rf230: use __func__ macro for debug messages ieee802154: at86rf230: switch from BUG_ON() to WARN_ON() on problem ARM: pxa: irq: fix handling of ICMR registers in suspend/resume netfilter: x_tables: set module owner for icmp(6) matches smsc75xx: Add workaround for gigabit link up hardware errata. kasan: fix shadow_size calculation error in kasan_module_alloc tracing: Use __printf markup to silence compiler ARM: imx_v4_v5_defconfig: Select ULPI support ARM: imx_v6_v7_defconfig: Select ULPI support HID: wacom: Correct touch maximum XY of 2nd-gen Intuos m68k: fix "bad page state" oops on ColdFire boot bnx2x: Fix receiving tx-timeout in error or recovery state. drm/exynos: decon5433: Fix WINCONx reset value drm/exynos: decon5433: Fix per-plane global alpha for XRGB modes drm/exynos: gsc: Fix support for NV16/61, YUV420/YVU420 and YUV422 modes md/raid10: fix that replacement cannot complete recovery after reassemble dmaengine: k3dma: Off by one in k3_of_dma_simple_xlate() ARM: dts: da850: Fix interrups property for gpio selftests/x86/sigreturn/64: Fix spurious failures on AMD CPUs perf report powerpc: Fix crash if callchain is empty perf test session topology: Fix test on s390 usb: xhci: increase CRS timeout value ARM: dts: am437x: make edt-ft5x06 a wakeup source brcmfmac: stop watchdog before detach and free everything cxgb4: when disabling dcb set txq dcb priority to 0 Smack: Mark inode instant in smack_task_to_inode ipv6: mcast: fix unsolicited report interval after receiving querys locking/lockdep: Do not record IRQ state within lockdep code net: davinci_emac: match the mdio device against its compatible if possible ARC: Enable machine_desc->init_per_cpu for !CONFIG_SMP net: propagate dev_get_valid_name return code net: hamradio: use eth_broadcast_addr enic: initialize enic->rfs_h.lock in enic_probe qed: Add sanity check for SIMD fastpath handler. arm64: make secondary_start_kernel() notrace scsi: xen-scsifront: add error handling for xenbus_printf usb: gadget: dwc2: fix memory leak in gadget_init() usb: gadget: composite: fix delayed_status race condition when set_interface usb: dwc2: fix isoc split in transfer with no data ARM: dts: Cygnus: Fix I2C controller interrupt type selftests: sync: add config fragment for testing sync framework selftests: zram: return Kselftest Skip code for skipped tests selftests: user: return Kselftest Skip code for skipped tests selftests: static_keys: return Kselftest Skip code for skipped tests selftests: pstore: return Kselftest Skip code for skipped tests netfilter: ipv6: nf_defrag: reduce struct net memory waste ARC: Explicitly add -mmedium-calls to CFLAGS ANDROID: x86_64_cuttlefish_defconfig: Enable zram and zstd BACKPORT: crypto: zstd - Add zstd support UPSTREAM: zram: add zstd to the supported algorithms list UPSTREAM: lib: Add zstd modules UPSTREAM: lib: Add xxhash module UPSTREAM: zram: rework copy of compressor name in comp_algorithm_store() UPSTREAM: zram: constify attribute_group structures. UPSTREAM: zram: count same page write as page_stored UPSTREAM: zram: reduce load operation in page_same_filled UPSTREAM: zram: use zram_free_page instead of open-coded UPSTREAM: zram: introduce zram data accessor UPSTREAM: zram: remove zram_meta structure UPSTREAM: zram: use zram_slot_lock instead of raw bit_spin_lock op BACKPORT: zram: partial IO refactoring BACKPORT: zram: handle multiple pages attached bio's bvec UPSTREAM: zram: fix operator precedence to get offset BACKPORT: zram: extend zero pages to same element pages BACKPORT: zram: remove waitqueue for IO done UPSTREAM: zram: remove obsolete sysfs attrs UPSTREAM: zram: support BDI_CAP_STABLE_WRITES UPSTREAM: zram: revalidate disk under init_lock BACKPORT: mm: support anonymous stable page UPSTREAM: zram: use __GFP_MOVABLE for memory allocation UPSTREAM: zram: drop gfp_t from zcomp_strm_alloc() UPSTREAM: zram: add more compression algorithms UPSTREAM: zram: delete custom lzo/lz4 UPSTREAM: zram: cosmetic: cleanup documentation UPSTREAM: zram: use crypto api to check alg availability BACKPORT: zram: switch to crypto compress API UPSTREAM: zram: rename zstrm find-release functions UPSTREAM: zram: introduce per-device debug_stat sysfs node UPSTREAM: zram: remove max_comp_streams internals UPSTREAM: zram: user per-cpu compression streams BACKPORT: zsmalloc: require GFP in zs_malloc() UPSTREAM: zram/zcomp: do not zero out zcomp private pages UPSTREAM: zram: pass gfp from zcomp frontend to backend UPSTREAM: socket: close race condition between sock_close() and sockfs_setattr() ANDROID: Refresh x86_64_cuttlefish_defconfig Linux 4.4.151 isdn: Disable IIOCDBGVAR Bluetooth: avoid killing an already killed socket x86/mm: Simplify p[g4um]d_page() macros serial: 8250_dw: always set baud rate in dw8250_set_termios ACPI / PM: save NVS memory for ASUS 1025C laptop ACPI: save NVS memory for Lenovo G50-45 USB: option: add support for DW5821e USB: serial: sierra: fix potential deadlock at close ALSA: vxpocket: Fix invalid endian conversions ALSA: memalloc: Don't exceed over the requested size ALSA: hda: Correct Asrock B85M-ITX power_save blacklist entry ALSA: cs5535audio: Fix invalid endian conversion ALSA: virmidi: Fix too long output trigger loop ALSA: vx222: Fix invalid endian conversions ALSA: hda - Turn CX8200 into D3 as well upon reboot ALSA: hda - Sleep for 10ms after entering D3 on Conexant codecs net_sched: fix NULL pointer dereference when delete tcindex filter vsock: split dwork to avoid reinitializations net_sched: Fix missing res info when create new tc_index filter llc: use refcount_inc_not_zero() for llc_sap_find() l2tp: use sk_dst_check() to avoid race on sk->sk_dst_cache dccp: fix undefined behavior with 'cwnd' shift in ccid2_cwnd_restart() Conflicts: drivers/block/zram/zram_drv.c drivers/staging/android/ion/ion.c include/linux/swap.h mm/zsmalloc.c Change-Id: I1c437ac5133503a939d06d51ec778b65371df6d1 Signed-off-by: Srinivasarao P <spathi@codeaurora.org>
Diffstat (limited to 'lib/zstd/huf_compress.c')
-rw-r--r--lib/zstd/huf_compress.c770
1 files changed, 770 insertions, 0 deletions
diff --git a/lib/zstd/huf_compress.c b/lib/zstd/huf_compress.c
new file mode 100644
index 000000000000..40055a7016e6
--- /dev/null
+++ b/lib/zstd/huf_compress.c
@@ -0,0 +1,770 @@
+/*
+ * Huffman encoder, part of New Generation Entropy library
+ * Copyright (C) 2013-2016, Yann Collet.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ */
+
+/* **************************************************************
+* Includes
+****************************************************************/
+#include "bitstream.h"
+#include "fse.h" /* header compression */
+#include "huf.h"
+#include <linux/kernel.h>
+#include <linux/string.h> /* memcpy, memset */
+
+/* **************************************************************
+* Error Management
+****************************************************************/
+#define HUF_STATIC_ASSERT(c) \
+ { \
+ enum { HUF_static_assert = 1 / (int)(!!(c)) }; \
+ } /* use only *after* variable declarations */
+#define CHECK_V_F(e, f) \
+ size_t const e = f; \
+ if (ERR_isError(e)) \
+ return f
+#define CHECK_F(f) \
+ { \
+ CHECK_V_F(_var_err__, f); \
+ }
+
+/* **************************************************************
+* Utils
+****************************************************************/
+unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
+{
+ return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
+}
+
+/* *******************************************************
+* HUF : Huffman block compression
+*********************************************************/
+/* HUF_compressWeights() :
+ * Same as FSE_compress(), but dedicated to huff0's weights compression.
+ * The use case needs much less stack memory.
+ * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
+ */
+#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
+size_t HUF_compressWeights_wksp(void *dst, size_t dstSize, const void *weightTable, size_t wtSize, void *workspace, size_t workspaceSize)
+{
+ BYTE *const ostart = (BYTE *)dst;
+ BYTE *op = ostart;
+ BYTE *const oend = ostart + dstSize;
+
+ U32 maxSymbolValue = HUF_TABLELOG_MAX;
+ U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
+
+ FSE_CTable *CTable;
+ U32 *count;
+ S16 *norm;
+ size_t spaceUsed32 = 0;
+
+ HUF_STATIC_ASSERT(sizeof(FSE_CTable) == sizeof(U32));
+
+ CTable = (FSE_CTable *)((U32 *)workspace + spaceUsed32);
+ spaceUsed32 += FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX);
+ count = (U32 *)workspace + spaceUsed32;
+ spaceUsed32 += HUF_TABLELOG_MAX + 1;
+ norm = (S16 *)((U32 *)workspace + spaceUsed32);
+ spaceUsed32 += ALIGN(sizeof(S16) * (HUF_TABLELOG_MAX + 1), sizeof(U32)) >> 2;
+
+ if ((spaceUsed32 << 2) > workspaceSize)
+ return ERROR(tableLog_tooLarge);
+ workspace = (U32 *)workspace + spaceUsed32;
+ workspaceSize -= (spaceUsed32 << 2);
+
+ /* init conditions */
+ if (wtSize <= 1)
+ return 0; /* Not compressible */
+
+ /* Scan input and build symbol stats */
+ {
+ CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize));
+ if (maxCount == wtSize)
+ return 1; /* only a single symbol in src : rle */
+ if (maxCount == 1)
+ return 0; /* each symbol present maximum once => not compressible */
+ }
+
+ tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
+ CHECK_F(FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue));
+
+ /* Write table description header */
+ {
+ CHECK_V_F(hSize, FSE_writeNCount(op, oend - op, norm, maxSymbolValue, tableLog));
+ op += hSize;
+ }
+
+ /* Compress */
+ CHECK_F(FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, workspace, workspaceSize));
+ {
+ CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable));
+ if (cSize == 0)
+ return 0; /* not enough space for compressed data */
+ op += cSize;
+ }
+
+ return op - ostart;
+}
+
+struct HUF_CElt_s {
+ U16 val;
+ BYTE nbBits;
+}; /* typedef'd to HUF_CElt within "huf.h" */
+
+/*! HUF_writeCTable_wksp() :
+ `CTable` : Huffman tree to save, using huf representation.
+ @return : size of saved CTable */
+size_t HUF_writeCTable_wksp(void *dst, size_t maxDstSize, const HUF_CElt *CTable, U32 maxSymbolValue, U32 huffLog, void *workspace, size_t workspaceSize)
+{
+ BYTE *op = (BYTE *)dst;
+ U32 n;
+
+ BYTE *bitsToWeight;
+ BYTE *huffWeight;
+ size_t spaceUsed32 = 0;
+
+ bitsToWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
+ spaceUsed32 += ALIGN(HUF_TABLELOG_MAX + 1, sizeof(U32)) >> 2;
+ huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
+ spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX, sizeof(U32)) >> 2;
+
+ if ((spaceUsed32 << 2) > workspaceSize)
+ return ERROR(tableLog_tooLarge);
+ workspace = (U32 *)workspace + spaceUsed32;
+ workspaceSize -= (spaceUsed32 << 2);
+
+ /* check conditions */
+ if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
+ return ERROR(maxSymbolValue_tooLarge);
+
+ /* convert to weight */
+ bitsToWeight[0] = 0;
+ for (n = 1; n < huffLog + 1; n++)
+ bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
+ for (n = 0; n < maxSymbolValue; n++)
+ huffWeight[n] = bitsToWeight[CTable[n].nbBits];
+
+ /* attempt weights compression by FSE */
+ {
+ CHECK_V_F(hSize, HUF_compressWeights_wksp(op + 1, maxDstSize - 1, huffWeight, maxSymbolValue, workspace, workspaceSize));
+ if ((hSize > 1) & (hSize < maxSymbolValue / 2)) { /* FSE compressed */
+ op[0] = (BYTE)hSize;
+ return hSize + 1;
+ }
+ }
+
+ /* write raw values as 4-bits (max : 15) */
+ if (maxSymbolValue > (256 - 128))
+ return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
+ if (((maxSymbolValue + 1) / 2) + 1 > maxDstSize)
+ return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
+ op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue - 1));
+ huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
+ for (n = 0; n < maxSymbolValue; n += 2)
+ op[(n / 2) + 1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n + 1]);
+ return ((maxSymbolValue + 1) / 2) + 1;
+}
+
+size_t HUF_readCTable_wksp(HUF_CElt *CTable, U32 maxSymbolValue, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
+{
+ U32 *rankVal;
+ BYTE *huffWeight;
+ U32 tableLog = 0;
+ U32 nbSymbols = 0;
+ size_t readSize;
+ size_t spaceUsed32 = 0;
+
+ rankVal = (U32 *)workspace + spaceUsed32;
+ spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
+ huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
+ spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
+
+ if ((spaceUsed32 << 2) > workspaceSize)
+ return ERROR(tableLog_tooLarge);
+ workspace = (U32 *)workspace + spaceUsed32;
+ workspaceSize -= (spaceUsed32 << 2);
+
+ /* get symbol weights */
+ readSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
+ if (ERR_isError(readSize))
+ return readSize;
+
+ /* check result */
+ if (tableLog > HUF_TABLELOG_MAX)
+ return ERROR(tableLog_tooLarge);
+ if (nbSymbols > maxSymbolValue + 1)
+ return ERROR(maxSymbolValue_tooSmall);
+
+ /* Prepare base value per rank */
+ {
+ U32 n, nextRankStart = 0;
+ for (n = 1; n <= tableLog; n++) {
+ U32 curr = nextRankStart;
+ nextRankStart += (rankVal[n] << (n - 1));
+ rankVal[n] = curr;
+ }
+ }
+
+ /* fill nbBits */
+ {
+ U32 n;
+ for (n = 0; n < nbSymbols; n++) {
+ const U32 w = huffWeight[n];
+ CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
+ }
+ }
+
+ /* fill val */
+ {
+ U16 nbPerRank[HUF_TABLELOG_MAX + 2] = {0}; /* support w=0=>n=tableLog+1 */
+ U16 valPerRank[HUF_TABLELOG_MAX + 2] = {0};
+ {
+ U32 n;
+ for (n = 0; n < nbSymbols; n++)
+ nbPerRank[CTable[n].nbBits]++;
+ }
+ /* determine stating value per rank */
+ valPerRank[tableLog + 1] = 0; /* for w==0 */
+ {
+ U16 min = 0;
+ U32 n;
+ for (n = tableLog; n > 0; n--) { /* start at n=tablelog <-> w=1 */
+ valPerRank[n] = min; /* get starting value within each rank */
+ min += nbPerRank[n];
+ min >>= 1;
+ }
+ }
+ /* assign value within rank, symbol order */
+ {
+ U32 n;
+ for (n = 0; n <= maxSymbolValue; n++)
+ CTable[n].val = valPerRank[CTable[n].nbBits]++;
+ }
+ }
+
+ return readSize;
+}
+
+typedef struct nodeElt_s {
+ U32 count;
+ U16 parent;
+ BYTE byte;
+ BYTE nbBits;
+} nodeElt;
+
+static U32 HUF_setMaxHeight(nodeElt *huffNode, U32 lastNonNull, U32 maxNbBits)
+{
+ const U32 largestBits = huffNode[lastNonNull].nbBits;
+ if (largestBits <= maxNbBits)
+ return largestBits; /* early exit : no elt > maxNbBits */
+
+ /* there are several too large elements (at least >= 2) */
+ {
+ int totalCost = 0;
+ const U32 baseCost = 1 << (largestBits - maxNbBits);
+ U32 n = lastNonNull;
+
+ while (huffNode[n].nbBits > maxNbBits) {
+ totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
+ huffNode[n].nbBits = (BYTE)maxNbBits;
+ n--;
+ } /* n stops at huffNode[n].nbBits <= maxNbBits */
+ while (huffNode[n].nbBits == maxNbBits)
+ n--; /* n end at index of smallest symbol using < maxNbBits */
+
+ /* renorm totalCost */
+ totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */
+
+ /* repay normalized cost */
+ {
+ U32 const noSymbol = 0xF0F0F0F0;
+ U32 rankLast[HUF_TABLELOG_MAX + 2];
+ int pos;
+
+ /* Get pos of last (smallest) symbol per rank */
+ memset(rankLast, 0xF0, sizeof(rankLast));
+ {
+ U32 currNbBits = maxNbBits;
+ for (pos = n; pos >= 0; pos--) {
+ if (huffNode[pos].nbBits >= currNbBits)
+ continue;
+ currNbBits = huffNode[pos].nbBits; /* < maxNbBits */
+ rankLast[maxNbBits - currNbBits] = pos;
+ }
+ }
+
+ while (totalCost > 0) {
+ U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
+ for (; nBitsToDecrease > 1; nBitsToDecrease--) {
+ U32 highPos = rankLast[nBitsToDecrease];
+ U32 lowPos = rankLast[nBitsToDecrease - 1];
+ if (highPos == noSymbol)
+ continue;
+ if (lowPos == noSymbol)
+ break;
+ {
+ U32 const highTotal = huffNode[highPos].count;
+ U32 const lowTotal = 2 * huffNode[lowPos].count;
+ if (highTotal <= lowTotal)
+ break;
+ }
+ }
+ /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
+ /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
+ while ((nBitsToDecrease <= HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
+ nBitsToDecrease++;
+ totalCost -= 1 << (nBitsToDecrease - 1);
+ if (rankLast[nBitsToDecrease - 1] == noSymbol)
+ rankLast[nBitsToDecrease - 1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */
+ huffNode[rankLast[nBitsToDecrease]].nbBits++;
+ if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
+ rankLast[nBitsToDecrease] = noSymbol;
+ else {
+ rankLast[nBitsToDecrease]--;
+ if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits - nBitsToDecrease)
+ rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
+ }
+ } /* while (totalCost > 0) */
+
+ while (totalCost < 0) { /* Sometimes, cost correction overshoot */
+ if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0
+ (using maxNbBits) */
+ while (huffNode[n].nbBits == maxNbBits)
+ n--;
+ huffNode[n + 1].nbBits--;
+ rankLast[1] = n + 1;
+ totalCost++;
+ continue;
+ }
+ huffNode[rankLast[1] + 1].nbBits--;
+ rankLast[1]++;
+ totalCost++;
+ }
+ }
+ } /* there are several too large elements (at least >= 2) */
+
+ return maxNbBits;
+}
+
+typedef struct {
+ U32 base;
+ U32 curr;
+} rankPos;
+
+static void HUF_sort(nodeElt *huffNode, const U32 *count, U32 maxSymbolValue)
+{
+ rankPos rank[32];
+ U32 n;
+
+ memset(rank, 0, sizeof(rank));
+ for (n = 0; n <= maxSymbolValue; n++) {
+ U32 r = BIT_highbit32(count[n] + 1);
+ rank[r].base++;
+ }
+ for (n = 30; n > 0; n--)
+ rank[n - 1].base += rank[n].base;
+ for (n = 0; n < 32; n++)
+ rank[n].curr = rank[n].base;
+ for (n = 0; n <= maxSymbolValue; n++) {
+ U32 const c = count[n];
+ U32 const r = BIT_highbit32(c + 1) + 1;
+ U32 pos = rank[r].curr++;
+ while ((pos > rank[r].base) && (c > huffNode[pos - 1].count))
+ huffNode[pos] = huffNode[pos - 1], pos--;
+ huffNode[pos].count = c;
+ huffNode[pos].byte = (BYTE)n;
+ }
+}
+
+/** HUF_buildCTable_wksp() :
+ * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
+ * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
+ */
+#define STARTNODE (HUF_SYMBOLVALUE_MAX + 1)
+typedef nodeElt huffNodeTable[2 * HUF_SYMBOLVALUE_MAX + 1 + 1];
+size_t HUF_buildCTable_wksp(HUF_CElt *tree, const U32 *count, U32 maxSymbolValue, U32 maxNbBits, void *workSpace, size_t wkspSize)
+{
+ nodeElt *const huffNode0 = (nodeElt *)workSpace;
+ nodeElt *const huffNode = huffNode0 + 1;
+ U32 n, nonNullRank;
+ int lowS, lowN;
+ U16 nodeNb = STARTNODE;
+ U32 nodeRoot;
+
+ /* safety checks */
+ if (wkspSize < sizeof(huffNodeTable))
+ return ERROR(GENERIC); /* workSpace is not large enough */
+ if (maxNbBits == 0)
+ maxNbBits = HUF_TABLELOG_DEFAULT;
+ if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
+ return ERROR(GENERIC);
+ memset(huffNode0, 0, sizeof(huffNodeTable));
+
+ /* sort, decreasing order */
+ HUF_sort(huffNode, count, maxSymbolValue);
+
+ /* init for parents */
+ nonNullRank = maxSymbolValue;
+ while (huffNode[nonNullRank].count == 0)
+ nonNullRank--;
+ lowS = nonNullRank;
+ nodeRoot = nodeNb + lowS - 1;
+ lowN = nodeNb;
+ huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS - 1].count;
+ huffNode[lowS].parent = huffNode[lowS - 1].parent = nodeNb;
+ nodeNb++;
+ lowS -= 2;
+ for (n = nodeNb; n <= nodeRoot; n++)
+ huffNode[n].count = (U32)(1U << 30);
+ huffNode0[0].count = (U32)(1U << 31); /* fake entry, strong barrier */
+
+ /* create parents */
+ while (nodeNb <= nodeRoot) {
+ U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+ U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+ huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
+ huffNode[n1].parent = huffNode[n2].parent = nodeNb;
+ nodeNb++;
+ }
+
+ /* distribute weights (unlimited tree height) */
+ huffNode[nodeRoot].nbBits = 0;
+ for (n = nodeRoot - 1; n >= STARTNODE; n--)
+ huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1;
+ for (n = 0; n <= nonNullRank; n++)
+ huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1;
+
+ /* enforce maxTableLog */
+ maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
+
+ /* fill result into tree (val, nbBits) */
+ {
+ U16 nbPerRank[HUF_TABLELOG_MAX + 1] = {0};
+ U16 valPerRank[HUF_TABLELOG_MAX + 1] = {0};
+ if (maxNbBits > HUF_TABLELOG_MAX)
+ return ERROR(GENERIC); /* check fit into table */
+ for (n = 0; n <= nonNullRank; n++)
+ nbPerRank[huffNode[n].nbBits]++;
+ /* determine stating value per rank */
+ {
+ U16 min = 0;
+ for (n = maxNbBits; n > 0; n--) {
+ valPerRank[n] = min; /* get starting value within each rank */
+ min += nbPerRank[n];
+ min >>= 1;
+ }
+ }
+ for (n = 0; n <= maxSymbolValue; n++)
+ tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
+ for (n = 0; n <= maxSymbolValue; n++)
+ tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
+ }
+
+ return maxNbBits;
+}
+
+static size_t HUF_estimateCompressedSize(HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue)
+{
+ size_t nbBits = 0;
+ int s;
+ for (s = 0; s <= (int)maxSymbolValue; ++s) {
+ nbBits += CTable[s].nbBits * count[s];
+ }
+ return nbBits >> 3;
+}
+
+static int HUF_validateCTable(const HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue)
+{
+ int bad = 0;
+ int s;
+ for (s = 0; s <= (int)maxSymbolValue; ++s) {
+ bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
+ }
+ return !bad;
+}
+
+static void HUF_encodeSymbol(BIT_CStream_t *bitCPtr, U32 symbol, const HUF_CElt *CTable)
+{
+ BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
+}
+
+size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
+
+#define HUF_FLUSHBITS(s) BIT_flushBits(s)
+
+#define HUF_FLUSHBITS_1(stream) \
+ if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 2 + 7) \
+ HUF_FLUSHBITS(stream)
+
+#define HUF_FLUSHBITS_2(stream) \
+ if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 4 + 7) \
+ HUF_FLUSHBITS(stream)
+
+size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable)
+{
+ const BYTE *ip = (const BYTE *)src;
+ BYTE *const ostart = (BYTE *)dst;
+ BYTE *const oend = ostart + dstSize;
+ BYTE *op = ostart;
+ size_t n;
+ BIT_CStream_t bitC;
+
+ /* init */
+ if (dstSize < 8)
+ return 0; /* not enough space to compress */
+ {
+ size_t const initErr = BIT_initCStream(&bitC, op, oend - op);
+ if (HUF_isError(initErr))
+ return 0;
+ }
+
+ n = srcSize & ~3; /* join to mod 4 */
+ switch (srcSize & 3) {
+ case 3: HUF_encodeSymbol(&bitC, ip[n + 2], CTable); HUF_FLUSHBITS_2(&bitC);
+ case 2: HUF_encodeSymbol(&bitC, ip[n + 1], CTable); HUF_FLUSHBITS_1(&bitC);
+ case 1: HUF_encodeSymbol(&bitC, ip[n + 0], CTable); HUF_FLUSHBITS(&bitC);
+ case 0:
+ default:;
+ }
+
+ for (; n > 0; n -= 4) { /* note : n&3==0 at this stage */
+ HUF_encodeSymbol(&bitC, ip[n - 1], CTable);
+ HUF_FLUSHBITS_1(&bitC);
+ HUF_encodeSymbol(&bitC, ip[n - 2], CTable);
+ HUF_FLUSHBITS_2(&bitC);
+ HUF_encodeSymbol(&bitC, ip[n - 3], CTable);
+ HUF_FLUSHBITS_1(&bitC);
+ HUF_encodeSymbol(&bitC, ip[n - 4], CTable);
+ HUF_FLUSHBITS(&bitC);
+ }
+
+ return BIT_closeCStream(&bitC);
+}
+
+size_t HUF_compress4X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable)
+{
+ size_t const segmentSize = (srcSize + 3) / 4; /* first 3 segments */
+ const BYTE *ip = (const BYTE *)src;
+ const BYTE *const iend = ip + srcSize;
+ BYTE *const ostart = (BYTE *)dst;
+ BYTE *const oend = ostart + dstSize;
+ BYTE *op = ostart;
+
+ if (dstSize < 6 + 1 + 1 + 1 + 8)
+ return 0; /* minimum space to compress successfully */
+ if (srcSize < 12)
+ return 0; /* no saving possible : too small input */
+ op += 6; /* jumpTable */
+
+ {
+ CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable));
+ if (cSize == 0)
+ return 0;
+ ZSTD_writeLE16(ostart, (U16)cSize);
+ op += cSize;
+ }
+
+ ip += segmentSize;
+ {
+ CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable));
+ if (cSize == 0)
+ return 0;
+ ZSTD_writeLE16(ostart + 2, (U16)cSize);
+ op += cSize;
+ }
+
+ ip += segmentSize;
+ {
+ CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable));
+ if (cSize == 0)
+ return 0;
+ ZSTD_writeLE16(ostart + 4, (U16)cSize);
+ op += cSize;
+ }
+
+ ip += segmentSize;
+ {
+ CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, iend - ip, CTable));
+ if (cSize == 0)
+ return 0;
+ op += cSize;
+ }
+
+ return op - ostart;
+}
+
+static size_t HUF_compressCTable_internal(BYTE *const ostart, BYTE *op, BYTE *const oend, const void *src, size_t srcSize, unsigned singleStream,
+ const HUF_CElt *CTable)
+{
+ size_t const cSize =
+ singleStream ? HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) : HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
+ if (HUF_isError(cSize)) {
+ return cSize;
+ }
+ if (cSize == 0) {
+ return 0;
+ } /* uncompressible */
+ op += cSize;
+ /* check compressibility */
+ if ((size_t)(op - ostart) >= srcSize - 1) {
+ return 0;
+ }
+ return op - ostart;
+}
+
+/* `workSpace` must a table of at least 1024 unsigned */
+static size_t HUF_compress_internal(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog,
+ unsigned singleStream, void *workSpace, size_t wkspSize, HUF_CElt *oldHufTable, HUF_repeat *repeat, int preferRepeat)
+{
+ BYTE *const ostart = (BYTE *)dst;
+ BYTE *const oend = ostart + dstSize;
+ BYTE *op = ostart;
+
+ U32 *count;
+ size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1);
+ HUF_CElt *CTable;
+ size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1);
+
+ /* checks & inits */
+ if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize)
+ return ERROR(GENERIC);
+ if (!srcSize)
+ return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */
+ if (!dstSize)
+ return 0; /* cannot fit within dst budget */
+ if (srcSize > HUF_BLOCKSIZE_MAX)
+ return ERROR(srcSize_wrong); /* curr block size limit */
+ if (huffLog > HUF_TABLELOG_MAX)
+ return ERROR(tableLog_tooLarge);
+ if (!maxSymbolValue)
+ maxSymbolValue = HUF_SYMBOLVALUE_MAX;
+ if (!huffLog)
+ huffLog = HUF_TABLELOG_DEFAULT;
+
+ count = (U32 *)workSpace;
+ workSpace = (BYTE *)workSpace + countSize;
+ wkspSize -= countSize;
+ CTable = (HUF_CElt *)workSpace;
+ workSpace = (BYTE *)workSpace + CTableSize;
+ wkspSize -= CTableSize;
+
+ /* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */
+ if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
+ return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
+ }
+
+ /* Scan input and build symbol stats */
+ {
+ CHECK_V_F(largest, FSE_count_wksp(count, &maxSymbolValue, (const BYTE *)src, srcSize, (U32 *)workSpace));
+ if (largest == srcSize) {
+ *ostart = ((const BYTE *)src)[0];
+ return 1;
+ } /* single symbol, rle */
+ if (largest <= (srcSize >> 7) + 1)
+ return 0; /* Fast heuristic : not compressible enough */
+ }
+
+ /* Check validity of previous table */
+ if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) {
+ *repeat = HUF_repeat_none;
+ }
+ /* Heuristic : use existing table for small inputs */
+ if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
+ return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
+ }
+
+ /* Build Huffman Tree */
+ huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
+ {
+ CHECK_V_F(maxBits, HUF_buildCTable_wksp(CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize));
+ huffLog = (U32)maxBits;
+ /* Zero the unused symbols so we can check it for validity */
+ memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt));
+ }
+
+ /* Write table description header */
+ {
+ CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, CTable, maxSymbolValue, huffLog, workSpace, wkspSize));
+ /* Check if using the previous table will be beneficial */
+ if (repeat && *repeat != HUF_repeat_none) {
+ size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue);
+ size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue);
+ if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
+ return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
+ }
+ }
+ /* Use the new table */
+ if (hSize + 12ul >= srcSize) {
+ return 0;
+ }
+ op += hSize;
+ if (repeat) {
+ *repeat = HUF_repeat_none;
+ }
+ if (oldHufTable) {
+ memcpy(oldHufTable, CTable, CTableSize);
+ } /* Save the new table */
+ }
+ return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable);
+}
+
+size_t HUF_compress1X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
+ size_t wkspSize)
+{
+ return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0);
+}
+
+size_t HUF_compress1X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
+ size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat)
+{
+ return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat,
+ preferRepeat);
+}
+
+size_t HUF_compress4X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
+ size_t wkspSize)
+{
+ return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0);
+}
+
+size_t HUF_compress4X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
+ size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat)
+{
+ return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat,
+ preferRepeat);
+}