inikep · tansy · Oct 10, 2023 · Oct 10, 2023 · Oct 10, 2023
diff --git a/Makefile b/Makefile
@@ -172,9 172,9 @@ XZ_FILES = xz/common/alone_encoder.o xz/common/alone_decoder.o xz/check/crc32_t
 
 GIPFELI_FILES = gipfeli/decompress.o gipfeli/entropy.o gipfeli/entropy_code_builder.o gipfeli/gipfeli-internal.o gipfeli/lz77.o
 
-LIBDEFLATE_FILES = libdeflate/lib/adler32.o libdeflate/lib/utils.o libdeflate/lib/crc32.o libdeflate/lib/deflate_compress.o
-LIBDEFLATE_FILES = libdeflate/lib/deflate_decompress.o libdeflate/lib/gzip_compress.o libdeflate/lib/gzip_decompress.o
-LIBDEFLATE_FILES = libdeflate/lib/x86/cpu_features.o libdeflate/lib/arm/cpu_features.o libdeflate/lib/zlib_compress.o libdeflate/lib/zlib_decompress.o
 LIBDEFLATE_FILES = libdeflate/lib/adler32.o libdeflate/lib/crc32.o libdeflate/lib/deflate_compress.o libdeflate/lib/deflate_decompress.o
 LIBDEFLATE_FILES = libdeflate/lib/gzip_compress.o libdeflate/lib/gzip_decompress.o libdeflate/lib/utils.o libdeflate/lib/zlib_compress.o libdeflate/lib/zlib_decompress.o
 LIBDEFLATE_FILES = libdeflate/lib/x86/cpu_features.o libdeflate/lib/arm/cpu_features.o
 
 MISC_FILES = crush/crush.o shrinker/shrinker.o fastlz/fastlz.o pithy/pithy.o lzjb/lzjb2010.o wflz/wfLZ.o
 MISC_FILES = lzlib/lzlib.o blosclz/blosclz.o blosclz/fastcopy.o slz/slz.o

diff --git a/README.md b/README.md
@@ -1,10 1,10 @@
 Introduction
 -------------------------
 
-lzbench is an in-memory benchmark of open-source LZ77/LZSS/LZMA compressors. It joins all compressors into a single exe. 
-At the beginning an input file is read to memory. 
-Then all compressors are used to compress and decompress the file and decompressed file is verified. 
-This approach has a big advantage of using the same compiler with the same optimizations for all compressors. 
 lzbench is an in-memory benchmark of open-source LZ77/LZSS/LZMA compressors. It joins all compressors into a single exe.
 At the beginning an input file is read to memory.
 Then all compressors are used to compress and decompress the file and decompressed file is verified.
 This approach has a big advantage of using the same compiler with the same optimizations for all compressors.
 The disadvantage is that it requires source code of each compressor (therefore Slug or lzturbo are not included).
 
 |Status |
@@ -64,7 64,7 @@ make BUILD_ARCH=32-bit
 
 The default linking for Linux is dynamic and static for Windows. This can be changed with `make BUILD_STATIC=0/1`.
 
-To remove one of compressors you can add `-DBENCH_REMOVE_XXX` to `DEFINES` in Makefile (e.g. `DEFINES = -DBENCH_REMOVE_LZ4` to remove LZ4). 
 To remove one of compressors you can add `-DBENCH_REMOVE_XXX` to `DEFINES` in Makefile (e.g. `DEFINES = -DBENCH_REMOVE_LZ4` to remove LZ4).
 You also have to remove corresponding `*.o` files (e.g. `lz4/lz4.o` and `lz4/lz4hc.o`).
 
 lzbench was tested with:
@@ -76,8 76,8 @@ lzbench was tested with:
 
 Supported compressors
 -------------------------
-**Warning**: some of the compressors listed here have security issues and/or are 
-no longer maintained. For information about the security of the various compressors, 
 **Warning**: some of the compressors listed here have security issues and/or are
 no longer maintained. For information about the security of the various compressors,
 see the [CompFuzz Results](https://github.com/nemequ/compfuzz/wiki/Results) page.
 
  - [blosclz 2.0.0](https://github.com/Blosc/c-blosc2)
@@ -99,7 99,7 @@ see the [CompFuzz Results](https://github.com/nemequ/compfuzz/wiki/Results) page
  - [lzg 1.0.10](https://liblzg.bitsnbites.eu/)
  - [lzham 1.0](https://github.com/richgel999/lzham_codec)
  lzjb 2010
- - [lzlib 1.12-rc2](http://www.nongnu.org/lzip)
  - [lzlib 1.13](http://www.nongnu.org/lzip)
  - [lzma v19.00](http://7-zip.org)
  - [lzmat 1.01 v1.0](https://github.com/nemequ/lzmat) - WARNING: it contains bugs (decompression error; returns 0); it can throw SEGFAULT compiled with gcc 4.9 -O3
  - [lzo 2.10](http://www.oberhumer.com/opensource/lzo)

diff --git a/_lzbench/lzbench.h b/_lzbench/lzbench.h
@@ -18,11 18,11 @@
 
 #define MAX(a,b) ((a)>(b))?(a):(b)
 #ifndef MIN
- #define MIN(a,b) ((a)<(b)?(a):(b))
  #define MIN(a,b) ((a)<(b)?(a):(b))
 #endif
 
 #if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(WIN64) || defined(_WIN64)
- #define WINDOWS
  #define WINDOWS
 #endif
 
 /* **************************************
@@ -37,44 37,44 @@
 #endif
 
 #ifdef WINDOWS
- #include <windows.h>
- typedef LARGE_INTEGER bench_rate_t;
- typedef LARGE_INTEGER bench_timer_t;
- #define InitTimer(rate) if (!QueryPerformanceFrequency(&rate)) { printf("QueryPerformance not present"); };
- #define GetTime(now) QueryPerformanceCounter(&now); 
- #define GetDiffTime(rate, start_ticks, end_ticks) (1000000000ULL*(end_ticks.QuadPart - start_ticks.QuadPart)/rate.QuadPart)
- void uni_sleep(UINT milisec) { Sleep(milisec); };
  #include <windows.h>
  typedef LARGE_INTEGER bench_rate_t;
  typedef LARGE_INTEGER bench_timer_t;
  #define InitTimer(rate) if (!QueryPerformanceFrequency(&rate)) { printf("QueryPerformance not present"); };
  #define GetTime(now) QueryPerformanceCounter(&now);
  #define GetDiffTime(rate, start_ticks, end_ticks) (1000000000ULL*(end_ticks.QuadPart - start_ticks.QuadPart)/rate.QuadPart)
  void uni_sleep(UINT milisec) { Sleep(milisec); };
  #ifndef fseeko
- #ifdef _fseeki64
- #define fseeko _fseeki64 
  #ifdef _fseeki64
  #define fseeko _fseeki64
  #define ftello _ftelli64
- #else
- #define fseeko fseek 
  #else
  #define fseeko fseek
  #define ftello ftell
  #endif
- #endif
- #define PROGOS "Windows"
  #endif
  #define PROGOS "Windows"
 #else
  #include <stdarg.h> // va_args
- #include <time.h> 
- #include <unistd.h>
- #include <sys/resource.h>
- void uni_sleep(uint32_t milisec) { usleep(milisec * 1000); };
  #include <time.h>
  #include <unistd.h>
  #include <sys/resource.h>
  void uni_sleep(uint32_t milisec) { usleep(milisec * 1000); };
 #if defined(__APPLE__) || defined(__MACH__)
  #include <mach/mach_time.h>
- typedef mach_timebase_info_data_t bench_rate_t;
  typedef mach_timebase_info_data_t bench_rate_t;
  typedef uint64_t bench_timer_t;
- #define InitTimer(rate) mach_timebase_info(&rate);
- #define GetTime(now) now = mach_absolute_time();
- #define GetDiffTime(rate, start_ticks, end_ticks) ((end_ticks - start_ticks) * (uint64_t)rate.numer) / ((uint64_t)rate.denom)
- #define PROGOS "MacOS"
  #define InitTimer(rate) mach_timebase_info(&rate);
  #define GetTime(now) now = mach_absolute_time();
  #define GetDiffTime(rate, start_ticks, end_ticks) ((end_ticks - start_ticks) * (uint64_t)rate.numer) / ((uint64_t)rate.denom)
  #define PROGOS "MacOS"
 #else
- typedef struct timespec bench_rate_t;
  typedef struct timespec bench_rate_t;
  typedef struct timespec bench_timer_t;
- #define InitTimer(rate)
- #define GetTime(now) if (clock_gettime(CLOCK_MONOTONIC, &now) == -1 ){ printf("clock_gettime error"); };
- #define GetDiffTime(rate, start_ticks, end_ticks) (1000000000ULL*( end_ticks.tv_sec - start_ticks.tv_sec ) ( end_ticks.tv_nsec - start_ticks.tv_nsec ))
- #define PROGOS "Linux"
  #define InitTimer(rate)
  #define GetTime(now) if (clock_gettime(CLOCK_MONOTONIC, &now) == -1 ){ printf("clock_gettime error"); };
  #define GetDiffTime(rate, start_ticks, end_ticks) (1000000000ULL*( end_ticks.tv_sec - start_ticks.tv_sec ) ( end_ticks.tv_nsec - start_ticks.tv_nsec ))
  #define PROGOS "Linux"
 #endif
 #endif
 
@@ -154,7 154,7 @@ static const compressor_desc_t comp_desc[LZBENCH_COMPRESSOR_COUNT] =
  { "fastlzma2", "1.0.1", 1, 10, 0, 0, lzbench_fastlzma2_compress, lzbench_fastlzma2_decompress, NULL, NULL },
  { "gipfeli", "2016-07-13", 0, 0, 0, 0, lzbench_gipfeli_compress, lzbench_gipfeli_decompress, NULL, NULL },
  { "glza", "0.8", 0, 0, 0, 0, lzbench_glza_compress, lzbench_glza_decompress, NULL, NULL },
- { "libdeflate", "1.9",  1, 12, 0, 0, lzbench_libdeflate_compress, lzbench_libdeflate_decompress, NULL, NULL },
  { "libdeflate", "1.19", 1, 12, 0, 0, lzbench_libdeflate_compress, lzbench_libdeflate_decompress, NULL, NULL },
  { "lz4", "1.9.3", 0, 0, 0, 0, lzbench_lz4_compress, lzbench_lz4_decompress, NULL, NULL },
  { "lz4fast", "1.9.3", 1, 99, 0, 0, lzbench_lz4fast_compress, lzbench_lz4_decompress, NULL, NULL },
  { "lz4hc", "1.9.3", 1, 12, 0, 0, lzbench_lz4hc_compress, lzbench_lz4_decompress, NULL, NULL },
@@ -165,7 165,7 @@ static const compressor_desc_t comp_desc[LZBENCH_COMPRESSOR_COUNT] =
  { "lzham22", "1.0", 0, 4, 22, 0, lzbench_lzham_compress, lzbench_lzham_decompress, NULL, NULL },
  { "lzham24", "1.0", 0, 4, 24, 0, lzbench_lzham_compress, lzbench_lzham_decompress, NULL, NULL },
  { "lzjb", "2010", 0, 0, 0, 0, lzbench_lzjb_compress, lzbench_lzjb_decompress, NULL, NULL },
- { "lzlib", "1.12-rc2", 0, 9, 0, 0, lzbench_lzlib_compress, lzbench_lzlib_decompress, NULL, NULL },
  { "lzlib", "1.13",  0, 9, 0, 0, lzbench_lzlib_compress, lzbench_lzlib_decompress, NULL, NULL },
  { "lzma", "19.00", 0, 9, 0, 0, lzbench_lzma_compress, lzbench_lzma_decompress, NULL, NULL },
  { "lzmat", "1.01", 0, 0, 0, 0, lzbench_lzmat_compress, lzbench_lzmat_decompress, NULL, NULL }, // decompression error (returns 0) and SEGFAULT (?)
  { "lzo1", "2.10", 1, 1, 0, 0, lzbench_lzo1_compress, lzbench_lzo1_decompress, lzbench_lzo_init, lzbench_lzo_deinit },

diff --git a/libdeflate/NEWS.md b/libdeflate/NEWS.md
@@ -1,5 1,199 @@
 # libdeflate release notes
 
 ## Version 1.19
 
 * Added new functions `libdeflate_alloc_compressor_ex()` and
  `libdeflate_alloc_decompressor_ex()`. These functions allow specifying a
  custom memory allocator on a per-compressor basis.
 
 * libdeflate now always generates Huffman codes with at least 2 codewords. This
  fixes a compatibility issue where Windows Explorer's ZIP unpacker could not
  decompress DEFLATE streams created by libdeflate. libdeflate's behavior was
  allowed by the DEFLATE RFC, but not all software was okay with it. In rare
  cases, compression ratios can be slightly reduced by this change.
 
 * Disabled the use of some compiler intrinsics on MSVC versions where they don't
  work correctly.
 
 * libdeflate can now compress up to the exact size of the output buffer.
 
 * Slightly improved compression performance at levels 1-9.
 
 * Improved the compression ratio of very short inputs.
 
 ## Version 1.18
 
 * Fixed a bug where the build type didn't default to "Release" when using
  CMake 3.10 or earlier.
 
 * Fixed a bug where some optimized code wasn't used when building with
  Clang 15 or later (x86), or with Clang 16 or later (aarch64).
 
 * Fixed build errors with some architecture and compiler combos:
  * aarch64 with Clang 16
  * armv6kz or armv7e-m with gcc
  * armhf with gcc (on Debian only)
 
 ## Version 1.17
 
 (Apologies for another release so soon after v1.16, but the bug fix listed below
 needed to go out.)
 
 * Fixed a bug introduced in v1.16 where compression at levels 10-12 would
  sometimes produce an output larger than the size that was returned by the
  corresponding `libdeflate_*_compress_bound()` function.
 
 * Converted the fuzzing scripts to use LLVM's libFuzzer and added them to the
  GitHub Actions workflow. (This would have detected the above bug.)
 
 * Further improved the support for direct compilation without using the official
  build system. The top-level source directory no longer needs to be added to
  the include path, and building the programs no longer requires that
  `_FILE_OFFSET_BITS` and `_POSIX_C_SOURCE` be defined on the command line.
 
 ## Version 1.16
 
 * Improved the compression ratio at levels 10-12 slightly, mainly levels 11-12.
  Some inputs (such as certain PNG files) see much improved compression ratios.
  As a trade-off, compressing at levels 11-12 is now about 5-20% slower.
 
 * For consistency with zlib, the decompressor now returns an error on some
  invalid inputs that were accepted before.
 
 * Fixed a build error on arm64 with gcc with certain target CPUs. (Fixes v1.12)
 
 * Fixed a build error on arm32 with gcc 10.1-10.3 and 11.1-11.2. (Fixes v1.15)
 
 * Fixed a build error on arm32 with gcc in soft float mode. (Fixes v1.15)
 
 * Fixed a build error in programs/gzip.c with uClibc. (Fixes v1.15)
 
 * Fixed the install target on Windows. (Fixes v1.15)
 
 ## Version 1.15
 
 * libdeflate now uses CMake instead of a plain Makefile.
 
 * Improved MSVC support. Enabled most architecture-specific code with MSVC,
  fixed building with clang in MSVC compatibility mode, and other improvements.
 
 * When libdeflate is built with MinGW, the static library and import library are
  now named using the MinGW convention (`*.a` and `*.dll.a`) instead of the
  Visual Studio convention. This affects the official Windows binaries.
 
 ## Version 1.14
 
 Significantly improved decompression performance on all platforms. Examples
 include (measuring DEFLATE only):
 
 | Platform | Speedup over v1.13 |
 |------------------------------------|--------------------|
 | x86_64 (Intel Comet Lake), gcc | 1.287x |
 | x86_64 (Intel Comet Lake), clang | 1.437x |
 | x86_64 (Intel Ice Lake), gcc | 1.332x |
 | x86_64 (Intel Ice Lake), clang | 1.296x |
 | x86_64 (Intel Sandy Bridge), gcc | 1.162x |
 | x86_64 (Intel Sandy Bridge), clang | 1.092x |
 | x86_64 (AMD Zen 2), gcc | 1.263x |
 | x86_64 (AMD Zen 2), clang | 1.259x |
 | i386 (Intel Comet Lake), gcc | 1.570x |
 | i386 (Intel Comet Lake), clang | 1.344x |
 | arm64 (Apple M1), clang | 1.306x |
 | arm64 (Cortex-A76), clang | 1.355x |
 | arm64 (Cortex-A55), clang | 1.190x |
 | arm32 (Cortex-A76), clang | 1.665x |
 | arm32 (Cortex-A55), clang | 1.283x |
 
 Thanks to Dougall Johnson (https://dougallj.wordpress.com/) for ideas for many
 of the improvements.
 
 ## Version 1.13
 
 * Changed the 32-bit Windows build of the library to use the default calling
  convention (cdecl) instead of stdcall, reverting a change from libdeflate 1.4.
 
 * Fixed a couple macOS compatibility issues with the gzip program.
 
 ## Version 1.12
 
 This release focuses on improving the performance of the CRC-32 and Adler-32
 checksum algorithms on x86 and ARM (both 32-bit and 64-bit).
 
 * Build updates:
 
  * Fixed building libdeflate on Apple platforms.
 
  * For Visual Studio builds, Visual Studio 2015 or later is now required.
 
 * CRC-32 algorithm updates:
 
  * Improved CRC-32 performance on short inputs on x86 and ARM.
 
  * Improved CRC-32 performance on Apple Silicon Macs by using a 12-way pmull
  implementation. Performance on large inputs on M1 is now about 67 GB/s,
  compared to 8 GB/s before, or 31 GB/s with the Apple-provided zlib.
 
  * Improved CRC-32 performance on some other ARM CPUs by reworking the code so
  that multiple crc32 instructions can be issued in parallel.
 
  * Improved CRC-32 performance on some x86 CPUs by increasing the stride length
  of the pclmul implementation.
 
 * Adler-32 algorithm updates:
 
  * Improved Adler-32 performance on some x86 CPUs by optimizing the AVX-2
  implementation. E.g., performance on Zen 1 improved from 19 to 30 GB/s, and
  on Ice Lake from 35 to 41 GB/s (if the AVX-512 implementation is excluded).
 
  * Removed the AVX-512 implementation of Adler-32 to avoid CPU frequency
  downclocking, and because the AVX-2 implementation was made faster.
 
  * Improved Adler-32 performance on some ARM CPUs by optimizing the NEON
  implementation. E.g., Apple M1 improved from about 36 to 52 GB/s.
 
 ## Version 1.11
 
 * Library updates:
 
  * Improved compression performance slightly.
 
  * Detect arm64 CPU features on Apple platforms, which should improve
  performance in some areas such as CRC-32 computation.
 
 * Program updates:
 
  * The included `gzip` and `gunzip` programs now support the `-q` option.
 
  * The included `gunzip` program now passes through non-gzip data when both
  the `-f` and `-c` options are used.
 
 * Build updates:
 
  * Avoided a build error on arm32 with certain gcc versions, by disabling
  building `crc32_arm()` as dynamically-dispatched code when needed.
 
  * Support building with the LLVM toolchain on Windows.
 
  * Disabled the use of the "stdcall" ABI in static library builds on Windows.
 
  * Use the correct `install_name` in macOS builds.
 
  * Support Haiku builds.
 
 ## Version 1.10
 
 * Added an additional check to the decompressor to make it quickly detect
  certain bad inputs and not try to generate an unbounded amount of output.
 
  Note: this was only a problem when decompressing with an unknown output size,
  which isn't the recommended use case of libdeflate. However,
  `libdeflate-gunzip` has to do this, and it would run out of memory as it would
  keep trying to allocate a larger output buffer.
 
 * Fixed a build error on Solaris.
 
 * Cleaned up a few things in the compression code.
 
 ## Version 1.9
 
 * Made many improvements to the compression algorithms, and rebalanced the
@@ -9,8 203,8 @@
  ratio on data where short matches aren't useful, such as DNA sequencing
  data. This applies to all compression levels, but primarily to levels 1-9.
 
- * Levels 1 was made much faster, though it often compresses slightly worse
- than before (but still better than zlib).
  * Level 1 was made much faster, though it often compresses slightly worse than
  before (but still better than zlib).
 
  * Levels 8-9 were also made faster, though they often compress slightly worse
  than before (but still better than zlib). On some data, levels 8-9 are much