From 2149b1a8f6b763e558a1b99a933921ef271fba92 Mon Sep 17 00:00:00 2001 From: fbrosson Date: Thu, 17 May 2018 16:52:53 +0000 Subject: [PATCH 01/45] Add Haiku as a validated target. lz4 1.8.2 works fine on Haiku and passes all tests. --- Makefile | 2 +- lib/Makefile | 2 +- programs/Makefile | 2 +- tests/Makefile | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 86613fd71..4afd03fd7 100644 --- a/Makefile +++ b/Makefile @@ -89,7 +89,7 @@ clean: #----------------------------------------------------------------------------- # make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets #----------------------------------------------------------------------------- -ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS)) +ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku)) HOST_OS = POSIX .PHONY: install uninstall diff --git a/lib/Makefile b/lib/Makefile index abb6c075a..d2ce31d7a 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -123,7 +123,7 @@ clean: #----------------------------------------------------------------------------- # make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets #----------------------------------------------------------------------------- -ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS)) +ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku)) .PHONY: listL120 listL120: # extract lines >= 120 characters in *.{c,h}, by Takayuki Matsuoka (note : $$, for Makefile compatibility) diff --git a/programs/Makefile b/programs/Makefile index 72bdcaac1..655efbc6e 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -109,7 +109,7 @@ clean: #----------------------------------------------------------------------------- # make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets #----------------------------------------------------------------------------- -ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS)) +ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku)) unlz4: lz4 ln -s lz4 unlz4 diff --git a/tests/Makefile b/tests/Makefile index d2385614a..ac86c3e80 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -129,7 +129,7 @@ checkTag: checkTag.c $(LZ4DIR)/lz4.h #----------------------------------------------------------------------------- # validated only for Linux, OSX, BSD, Hurd and Solaris targets #----------------------------------------------------------------------------- -ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS)) +ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku)) MD5:=md5sum ifneq (,$(filter $(shell uname), Darwin )) From c746a27e91784adf9957600206a5d35dcdad04b1 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Mon, 21 May 2018 21:36:49 -0400 Subject: [PATCH 02/45] Test Linking C-Compiled Library and C++-Compiled Tests --- .travis.yml | 4 ++++ Makefile | 8 ++++++++ circle.yml | 1 + lib/lz4.c | 15 +++++++++++++++ tests/fullbench.c | 18 +++++++++++++++++- 5 files changed, 45 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 0a876f925..de6875be7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -49,6 +49,10 @@ matrix: packages: - valgrind + - env: Ubu=14.04 Cmd='make ctocpptest' COMPILER=cc + dist: trusty + sudo: false + - env: Ubu=14.04 Cmd='make -C tests test-lz4c32 test-fullbench32 versionsTest' COMPILER=cc dist: trusty sudo: required diff --git a/Makefile b/Makefile index 4afd03fd7..5776b1610 100644 --- a/Makefile +++ b/Makefile @@ -172,6 +172,14 @@ gpptest gpptest32: clean CC=$(CC) $(MAKE) -C $(PRGDIR) all CFLAGS="$(CFLAGS)" CC=$(CC) $(MAKE) -C $(TESTDIR) all CFLAGS="$(CFLAGS)" +ctocpptest: LIBCC="$(CC)" +ctocpptest: TESTCC="$(CXX)" +ctocpptest: CFLAGS="" +ctocpptest: clean + CC=$(LIBCC) $(MAKE) -C $(LZ4DIR) CFLAGS="$(CFLAGS)" all + CC=$(LIBCC) $(MAKE) -C $(TESTDIR) CFLAGS="$(CFLAGS)" lz4.o lz4hc.o lz4frame.o + CC=$(TESTCC) $(MAKE) -C $(TESTDIR) CFLAGS="$(CFLAGS)" all + c_standards: clean CFLAGS="-std=c90 -Werror" $(MAKE) clean allmost CFLAGS="-std=gnu90 -Werror" $(MAKE) clean allmost diff --git a/circle.yml b/circle.yml index fa3759069..1602e4977 100644 --- a/circle.yml +++ b/circle.yml @@ -11,6 +11,7 @@ test: - clang -v; make clangtest && make clean - g++ -v; make gpptest && make clean - gcc -v; make c_standards && make clean + - gcc -v; g++ -v; make ctocpptest && make clean - gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -Werror" make check && make clean - gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean - gcc-6 -v; CC=gcc-6 make c_standards && make clean diff --git a/lib/lz4.c b/lib/lz4.c index e51a3e0a4..aaa196db0 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -496,6 +496,21 @@ int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } int LZ4_sizeofState() { return LZ4_STREAMSIZE; } +/*-************************************ +* Internal Definitions used in Tests +**************************************/ +#if defined (__cplusplus) +extern "C" { +#endif + +int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize); + +int LZ4_decompress_safe_extDict(const char* in, char* out, int inSize, int outSize, const void* dict, size_t dictSize); + +#if defined (__cplusplus) +} +#endif + /*-****************************** * Compression functions ********************************/ diff --git a/tests/fullbench.c b/tests/fullbench.c index c06e2301d..2818ea28b 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -220,8 +220,16 @@ static int local_LZ4_compress_fast_continue0(const char* in, char* out, int inSi } #ifndef LZ4_DLL_IMPORT +#if defined (__cplusplus) +extern "C" { +#endif + /* declare hidden function */ -int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize); +extern int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize); + +#if defined (__cplusplus) +} +#endif static int local_LZ4_compress_forceDict(const char* in, char* out, int inSize) { @@ -289,8 +297,16 @@ static int local_LZ4_decompress_safe_usingDict(const char* in, char* out, int in } #ifndef LZ4_DLL_IMPORT +#if defined (__cplusplus) +extern "C" { +#endif + extern int LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize, const void* dict, size_t dictSize); +#if defined (__cplusplus) +} +#endif + static int local_LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize) { (void)inSize; From 91888f472d54ed90fb093f8c0a64e30896401846 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Mon, 21 May 2018 21:55:04 -0400 Subject: [PATCH 03/45] Remove #define-rename of `LZ4_decompress_safe_forceExtDict` --- lib/lz4.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index aaa196db0..ecd60fbe3 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -505,7 +505,7 @@ extern "C" { int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize); -int LZ4_decompress_safe_extDict(const char* in, char* out, int inSize, int outSize, const void* dict, size_t dictSize); +int LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize, const void* dict, size_t dictSize); #if defined (__cplusplus) } @@ -1660,10 +1660,10 @@ static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, i (BYTE*)dest-prefixSize, NULL, 0); } -LZ4_FORCE_O2_GCC_PPC64LE /* Exported under another name, for tests/fullbench.c */ -#define LZ4_decompress_safe_extDict LZ4_decompress_safe_forceExtDict -int LZ4_decompress_safe_extDict(const char* source, char* dest, int compressedSize, int maxOutputSize, - const void* dictStart, size_t dictSize) +LZ4_FORCE_O2_GCC_PPC64LE +int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, + int compressedSize, int maxOutputSize, + const void* dictStart, size_t dictSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, @@ -1788,8 +1788,8 @@ int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch /* The buffer wraps around, or they're switching to another buffer. */ lz4sd->extDictSize = lz4sd->prefixSize; lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; - result = LZ4_decompress_safe_extDict(source, dest, compressedSize, maxOutputSize, - lz4sd->externalDict, lz4sd->extDictSize); + result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, + lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; lz4sd->prefixSize = result; lz4sd->prefixEnd = (BYTE*)dest + result; @@ -1849,7 +1849,7 @@ int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressed return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, dictSize); } - return LZ4_decompress_safe_extDict(source, dest, compressedSize, maxOutputSize, dictStart, dictSize); + return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, dictSize); } int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize) From 4248a9bfc0a391438f18e62525568358253623c7 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Mon, 21 May 2018 21:43:54 -0400 Subject: [PATCH 04/45] Add `extern "C"` Guards Around Experimental HC Declarations --- lib/lz4hc.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/lz4hc.h b/lib/lz4hc.h index bb5e07373..970fa3966 100644 --- a/lib/lz4hc.h +++ b/lib/lz4hc.h @@ -246,6 +246,10 @@ LZ4_DEPRECATED("use LZ4_resetStreamHC() instead") LZ4LIB_API int LZ4_resetStr #ifndef LZ4_HC_SLO_098092834 #define LZ4_HC_SLO_098092834 +#if defined (__cplusplus) +extern "C" { +#endif + /*! LZ4_compress_HC_destSize() : v1.8.0 (experimental) * Will try to compress as much data from `src` as possible * that can fit into `targetDstSize` budget. @@ -343,5 +347,9 @@ int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* ds */ LZ4LIB_API void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream); +#if defined (__cplusplus) +} +#endif + #endif /* LZ4_HC_SLO_098092834 */ #endif /* LZ4_HC_STATIC_LINKING_ONLY */ From b8211544ef7f9c020fafb278cf7cf96f645a81ec Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 22 May 2018 11:28:39 -0400 Subject: [PATCH 05/45] Also Fix Appveyor Cast Warning --- lib/lz4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lz4.c b/lib/lz4.c index ecd60fbe3..05454fc09 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -714,7 +714,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic( cctx->dictSize += (U32)inputSize; } cctx->currentOffset += (U32)inputSize; - cctx->tableType = tableType; + cctx->tableType = (U16)tableType; if (inputSize Date: Tue, 22 May 2018 20:38:20 +0200 Subject: [PATCH 06/45] allow to override uname when cross-compiling When cross-compiling for example from Darwin to Linux it might be useful to override uname output to force Linux and create Linux libraries instead of Darwin libraries. --- lib/Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/Makefile b/lib/Makefile index d2ce31d7a..6b37839b8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -45,6 +45,7 @@ LIBVER := $(shell echo $(LIBVER_SCRIPT)) BUILD_SHARED:=yes BUILD_STATIC:=yes +OS ?= $(shell uname) CPPFLAGS+= -DXXH_NAMESPACE=LZ4_ CFLAGS ?= -O3 DEBUGFLAGS:= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ @@ -58,7 +59,7 @@ SRCFILES := $(sort $(wildcard *.c)) # OS X linker doesn't support -soname, and use different extension # see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html -ifeq ($(shell uname), Darwin) +ifeq ($(OS), Darwin) SHARED_EXT = dylib SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT) SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT) @@ -142,14 +143,14 @@ libdir ?= $(LIBDIR) INCLUDEDIR ?= $(prefix)/include includedir ?= $(INCLUDEDIR) -ifneq (,$(filter $(shell uname),OpenBSD FreeBSD NetBSD DragonFly)) +ifneq (,$(filter $(OS),OpenBSD FreeBSD NetBSD DragonFly)) PKGCONFIGDIR ?= $(prefix)/libdata/pkgconfig else PKGCONFIGDIR ?= $(libdir)/pkgconfig endif pkgconfigdir ?= $(PKGCONFIGDIR) -ifneq (,$(filter $(shell uname),SunOS)) +ifneq (,$(filter $(OS),SunOS)) INSTALL ?= ginstall else INSTALL ?= install From ebe7e356012e68dfdb45627976c77285db58cb9d Mon Sep 17 00:00:00 2001 From: Norm Green Date: Fri, 1 Jun 2018 11:52:22 -0700 Subject: [PATCH 07/45] Fix AIX 32 bit build problem of fuzzer.c --- tests/fuzzer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 5dd75b389..5a92f8f06 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -38,7 +38,7 @@ /*-************************************ * Dependencies **************************************/ -#ifdef __unix__ /* must be included before platform.h for MAP_ANONYMOUS */ +#if defined(__unix__) && !defined(_AIX) /* must be included before platform.h for MAP_ANONYMOUS */ # include /* mmap */ #endif #include "platform.h" /* _CRT_SECURE_NO_WARNINGS */ @@ -48,6 +48,10 @@ #include /* strcmp */ #include /* clock_t, clock, CLOCKS_PER_SEC */ #include +#if defined(__unix__) && defined(_AIX) +# include /* mmap */ +#endif + #define LZ4_STATIC_LINKING_ONLY #define LZ4_HC_STATIC_LINKING_ONLY #include "lz4hc.h" From 78978d655d0197f46a04ea4788a601d19afa9e7a Mon Sep 17 00:00:00 2001 From: Jennifer Liu Date: Wed, 20 Jun 2018 13:37:49 -0700 Subject: [PATCH 08/45] set up sample .ysml --- .circleci/config.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 .circleci/config.yml diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 000000000..65e6c33bb --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,13 @@ +version: 2 +jobs: + build: + docker: + - image: debian:stretch + steps: + - checkout + - run: + name: Greeting + command: echo Hello, world. + - run: + name: Print the Current Time + command: date From 536b79afd916d65c472a7157390ef621ee967771 Mon Sep 17 00:00:00 2001 From: Jennifer Liu Date: Mon, 25 Jun 2018 17:46:39 -0700 Subject: [PATCH 09/45] Added --fast command to cli --- programs/bench.c | 2 +- programs/lz4.1.md | 7 +++++++ programs/lz4cli.c | 35 ++++++++++++++++++++++++++++++++++- tests/Makefile | 6 ++++++ 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index 770191cfa..c91ce7c3f 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -407,7 +407,7 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize, if (g_displayLevel == 1 && !g_additionalParam) DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", LZ4_VERSION_STRING, LZ4_GIT_COMMIT_STRING, (U32)benchedSize, g_nbSeconds, (U32)(g_blockSize>>10)); - if (cLevelLast < cLevel) cLevelLast = cLevel; + // if (cLevelLast < cLevel) cLevelLast = cLevel; for (l=cLevel; l <= cLevelLast; l++) { BMK_benchMem(srcBuffer, benchedSize, diff --git a/programs/lz4.1.md b/programs/lz4.1.md index a5168e99d..d4eaf8aab 100644 --- a/programs/lz4.1.md +++ b/programs/lz4.1.md @@ -156,6 +156,13 @@ only the latest one will be applied. * `-BD`: Block Dependency (improves compression ratio on small blocks) +* `--fast[=#]`: + switch to ultra-fast compression levels. + If `=#` is not present, it defaults to `1`. + The higher the value, the faster the compression speed, at the cost of some compression ratio. + This setting overwrites compression level if one was set previously. + Similarly, if a compression level is set after `--fast`, it overrides it. + * `--[no-]frame-crc`: Select frame checksum (default:enabled) diff --git a/programs/lz4cli.c b/programs/lz4cli.c index ba519b462..94e3b14ba 100644 --- a/programs/lz4cli.c +++ b/programs/lz4cli.c @@ -141,6 +141,7 @@ static int usage_advanced(const char* exeName) DISPLAY( "--content-size : compressed frame includes original size (default:not present)\n"); DISPLAY( "--[no-]sparse : sparse mode (default:enabled on file, disabled on stdout)\n"); DISPLAY( "--favor-decSpeed: compressed files decompress faster, but are less compressed \n"); + DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1); DISPLAY( "Benchmark arguments : \n"); DISPLAY( " -b# : benchmark file(s), using # compression level (default : 1) \n"); DISPLAY( " -e# : test all compression levels from -bX to # (default : 1)\n"); @@ -272,13 +273,28 @@ static unsigned readU32FromChar(const char** stringPtr) return result; } +/** longCommandWArg() : + * check if *stringPtr is the same as longCommand. + * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. + * @return 0 and doesn't modify *stringPtr otherwise. + */ +static unsigned longCommandWArg(const char** stringPtr, const char* longCommand) +{ + size_t const comSize = strlen(longCommand); + int const result = !strncmp(*stringPtr, longCommand, comSize); + if (result) *stringPtr += comSize; + return result; +} + typedef enum { om_auto, om_compress, om_decompress, om_test, om_bench } operationMode_e; +#define CLEAN_RETURN(i) { operationResult = (i); goto _cleanup; } + int main(int argc, const char** argv) { int i, cLevel=1, - cLevelLast=1, + cLevelLast=-10000, legacy_format=0, forceStdout=0, main_pause=0, @@ -363,6 +379,23 @@ int main(int argc, const char** argv) if (!strcmp(argument, "--help")) { usage_advanced(exeName); goto _cleanup; } if (!strcmp(argument, "--keep")) { LZ4IO_setRemoveSrcFile(0); continue; } /* keep source file (default) */ if (!strcmp(argument, "--rm")) { LZ4IO_setRemoveSrcFile(1); continue; } + if (longCommandWArg(&argument, "--fast")) { + /* Parse optional window log */ + if (*argument == '=') { + U32 fastLevel; + ++argument; + fastLevel = readU32FromChar(&argument); + if (fastLevel) { + cLevel = -(int)fastLevel; + } + } else if (*argument != 0) { + /* Invalid character following --fast */ + CLEAN_RETURN(badusage(exeName)); + } else { + cLevel = -1; /* default for --fast */ + } + continue; + } } while (argument[1]!=0) { diff --git a/tests/Makefile b/tests/Makefile index ac86c3e80..a133df1df 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -262,8 +262,14 @@ test-lz4-basic: lz4 datagen unlz4 lz4cat cat tmp-tlb-hw >> tmp-tlb-hw.lz4 $(LZ4) -f tmp-tlb-hw.lz4 # uncompress valid frame followed by invalid data $(LZ4) -BX tmp-tlb-hw -c -q | $(LZ4) -tv # test block checksum + ./datagen -g256KB > tmp-tlb-dg256k + test "$(shell lz4 -c --fast tmp-tlb-dg256k| wc -c)" -lt "$(shell lz4 -c --fast=9 tmp-tlb-dg256k| wc -c)" + test "$(shell lz4 -c --fast=1 tmp-tlb-dg256k| wc -c)" -eq "$(shell lz4 -c --fast tmp-tlb-dg256k| wc -c)" + test "$(shell lz4 -c -9 tmp-tlb-dg256k| wc -c)" -lt "$(shell lz4 -c --fast=1 tmp-tlb-dg256k| wc -c)" @$(RM) tmp-tlb* + + test-lz4-dict: lz4 datagen @echo "\n ---- test lz4 compression/decompression with dictionary ----" ./datagen -g16KB > tmp-dict From 17bcb58eb0ca737c4d567159563ce950a0aef890 Mon Sep 17 00:00:00 2001 From: Jennifer Liu Date: Mon, 25 Jun 2018 18:51:47 -0700 Subject: [PATCH 10/45] Fixed bug about file to be compressed is not present --- .circleci/config.yml | 52 +++++++++++++++++++++++++++++++++----------- tests/Makefile | 7 +++--- 2 files changed, 42 insertions(+), 17 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 65e6c33bb..1602e4977 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,13 +1,39 @@ -version: 2 -jobs: - build: - docker: - - image: debian:stretch - steps: - - checkout - - run: - name: Greeting - command: echo Hello, world. - - run: - name: Print the Current Time - command: date +dependencies: + override: + - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test; sudo apt-get -y -qq update + - sudo apt-get -y install qemu-system-ppc qemu-user-static gcc-powerpc-linux-gnu + - sudo apt-get -y install qemu-system-arm gcc-arm-linux-gnueabi libc6-dev-armel-cross gcc-aarch64-linux-gnu libc6-dev-arm64-cross + - sudo apt-get -y install libc6-dev-i386 clang gcc-5 gcc-5-multilib gcc-6 valgrind + +test: + override: + # Tests compilers and C standards + - clang -v; make clangtest && make clean + - g++ -v; make gpptest && make clean + - gcc -v; make c_standards && make clean + - gcc -v; g++ -v; make ctocpptest && make clean + - gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -Werror" make check && make clean + - gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean + - gcc-6 -v; CC=gcc-6 make c_standards && make clean + - gcc-6 -v; CC=gcc-6 MOREFLAGS="-O2 -Werror" make check && make clean +# Shorter tests + - make cmake && make clean + - make -C tests test-lz4 + - make -C tests test-lz4c + - make -C tests test-frametest + - make -C tests test-fullbench + - make -C tests test-fuzzer && make clean + - make -C lib all && make clean + - pyenv global 3.4.4; make versionsTest MOREFLAGS=-I/usr/include/x86_64-linux-gnu && make clean + - make travis-install && make clean + # Longer tests + - gcc -v; CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean + - make usan && make clean + - clang -v; make staticAnalyze && make clean + # Valgrind tests + - make -C tests test-mem && make clean + # ARM, AArch64, PowerPC, PowerPC64 tests + - make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static && make clean + - make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static MOREFLAGS=-m64 && make clean + - make platformTest CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static && make clean + - make platformTest CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static && make clean diff --git a/tests/Makefile b/tests/Makefile index a133df1df..0eba02d36 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -262,10 +262,9 @@ test-lz4-basic: lz4 datagen unlz4 lz4cat cat tmp-tlb-hw >> tmp-tlb-hw.lz4 $(LZ4) -f tmp-tlb-hw.lz4 # uncompress valid frame followed by invalid data $(LZ4) -BX tmp-tlb-hw -c -q | $(LZ4) -tv # test block checksum - ./datagen -g256KB > tmp-tlb-dg256k - test "$(shell lz4 -c --fast tmp-tlb-dg256k| wc -c)" -lt "$(shell lz4 -c --fast=9 tmp-tlb-dg256k| wc -c)" - test "$(shell lz4 -c --fast=1 tmp-tlb-dg256k| wc -c)" -eq "$(shell lz4 -c --fast tmp-tlb-dg256k| wc -c)" - test "$(shell lz4 -c -9 tmp-tlb-dg256k| wc -c)" -lt "$(shell lz4 -c --fast=1 tmp-tlb-dg256k| wc -c)" + test "$(shell ./datagen -g20KB | lz4 -c --fast | wc -c)" -lt "$(shell ./datagen -g20KB | lz4 -c --fast=9 | wc -c)" # compressed size of compression level -1 should be lower than -9 + test "$(shell ./datagen -g20KB | lz4 -c --fast=1 | wc -c)" -eq "$(shell ./datagen -g20KB| lz4 -c --fast| wc -c)" # checks default fast compression is -1 + test "$(shell ./datagen -g20KB | lz4 -c -9 | wc -c)" -lt "$(shell ./datagen -g20KB| lz4 -c --fast=1 | wc -c)" # compressed size of compression level 9 should be lower than -1 @$(RM) tmp-tlb* From db97528fe81d5cde0a6cd780d6d58451526087c4 Mon Sep 17 00:00:00 2001 From: Jennifer Liu Date: Tue, 26 Jun 2018 10:33:15 -0700 Subject: [PATCH 11/45] fix yml file --- .circleci/{config.yml => circle.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .circleci/{config.yml => circle.yml} (100%) diff --git a/.circleci/config.yml b/.circleci/circle.yml similarity index 100% rename from .circleci/config.yml rename to .circleci/circle.yml From 9f8fd098523fb1d5ea791f1f905317c1e1348bf0 Mon Sep 17 00:00:00 2001 From: Jennifer Liu Date: Tue, 26 Jun 2018 10:49:39 -0700 Subject: [PATCH 12/45] delete commented out code in bench.c --- programs/bench.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index c91ce7c3f..1ab1b92d3 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -407,8 +407,6 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize, if (g_displayLevel == 1 && !g_additionalParam) DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", LZ4_VERSION_STRING, LZ4_GIT_COMMIT_STRING, (U32)benchedSize, g_nbSeconds, (U32)(g_blockSize>>10)); - // if (cLevelLast < cLevel) cLevelLast = cLevel; - for (l=cLevel; l <= cLevelLast; l++) { BMK_benchMem(srcBuffer, benchedSize, displayName, l, From 075952e34013a6b7edee0d3c75eecd62d07d7a20 Mon Sep 17 00:00:00 2001 From: Jennifer Liu Date: Tue, 26 Jun 2018 11:15:55 -0700 Subject: [PATCH 13/45] Fixed lz4 not found error --- tests/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/Makefile b/tests/Makefile index 0eba02d36..77220b55b 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -262,9 +262,9 @@ test-lz4-basic: lz4 datagen unlz4 lz4cat cat tmp-tlb-hw >> tmp-tlb-hw.lz4 $(LZ4) -f tmp-tlb-hw.lz4 # uncompress valid frame followed by invalid data $(LZ4) -BX tmp-tlb-hw -c -q | $(LZ4) -tv # test block checksum - test "$(shell ./datagen -g20KB | lz4 -c --fast | wc -c)" -lt "$(shell ./datagen -g20KB | lz4 -c --fast=9 | wc -c)" # compressed size of compression level -1 should be lower than -9 - test "$(shell ./datagen -g20KB | lz4 -c --fast=1 | wc -c)" -eq "$(shell ./datagen -g20KB| lz4 -c --fast| wc -c)" # checks default fast compression is -1 - test "$(shell ./datagen -g20KB | lz4 -c -9 | wc -c)" -lt "$(shell ./datagen -g20KB| lz4 -c --fast=1 | wc -c)" # compressed size of compression level 9 should be lower than -1 + test "$(shell ./datagen -g20KB | $(LZ4) -c --fast | wc -c)" -lt "$(shell ./datagen -g20KB | lz4 -c --fast=9 | wc -c)" # compressed size of compression level -1 should be lower than -9 + test "$(shell ./datagen -g20KB | $(LZ4) -c --fast=1 | wc -c)" -eq "$(shell ./datagen -g20KB| lz4 -c --fast| wc -c)" # checks default fast compression is -1 + test "$(shell ./datagen -g20KB | $(LZ4) -c -9 | wc -c)" -lt "$(shell ./datagen -g20KB| lz4 -c --fast=1 | wc -c)" # compressed size of compression level 9 should be lower than -1 @$(RM) tmp-tlb* From e95706bb2afe50aab6e2d812e73420f1477cbfea Mon Sep 17 00:00:00 2001 From: Jennifer Liu Date: Tue, 26 Jun 2018 11:24:21 -0700 Subject: [PATCH 14/45] Fixed lz4 not found error part 2 --- tests/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/Makefile b/tests/Makefile index 77220b55b..70e186441 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -262,9 +262,9 @@ test-lz4-basic: lz4 datagen unlz4 lz4cat cat tmp-tlb-hw >> tmp-tlb-hw.lz4 $(LZ4) -f tmp-tlb-hw.lz4 # uncompress valid frame followed by invalid data $(LZ4) -BX tmp-tlb-hw -c -q | $(LZ4) -tv # test block checksum - test "$(shell ./datagen -g20KB | $(LZ4) -c --fast | wc -c)" -lt "$(shell ./datagen -g20KB | lz4 -c --fast=9 | wc -c)" # compressed size of compression level -1 should be lower than -9 - test "$(shell ./datagen -g20KB | $(LZ4) -c --fast=1 | wc -c)" -eq "$(shell ./datagen -g20KB| lz4 -c --fast| wc -c)" # checks default fast compression is -1 - test "$(shell ./datagen -g20KB | $(LZ4) -c -9 | wc -c)" -lt "$(shell ./datagen -g20KB| lz4 -c --fast=1 | wc -c)" # compressed size of compression level 9 should be lower than -1 + test "$(shell ./datagen -g20KB | $(LZ4) -c --fast | wc -c)" -lt "$(shell ./datagen -g20KB | $(LZ4) -c --fast=9 | wc -c)" # compressed size of compression level -1 should be lower than -9 + test "$(shell ./datagen -g20KB | $(LZ4) -c --fast=1 | wc -c)" -eq "$(shell ./datagen -g20KB| $(LZ4) -c --fast| wc -c)" # checks default fast compression is -1 + test "$(shell ./datagen -g20KB | $(LZ4) -c -9 | wc -c)" -lt "$(shell ./datagen -g20KB| $(LZ4) -c --fast=1 | wc -c)" # compressed size of compression level 9 should be lower than -1 @$(RM) tmp-tlb* From fd1d59f7ba5ed24d973b7569511b7162978d3e46 Mon Sep 17 00:00:00 2001 From: Jennifer Liu Date: Tue, 26 Jun 2018 14:48:58 -0700 Subject: [PATCH 15/45] Removed duplicated circile.yml file --- .circleci/circle.yml | 39 --------------------------------------- 1 file changed, 39 deletions(-) delete mode 100644 .circleci/circle.yml diff --git a/.circleci/circle.yml b/.circleci/circle.yml deleted file mode 100644 index 1602e4977..000000000 --- a/.circleci/circle.yml +++ /dev/null @@ -1,39 +0,0 @@ -dependencies: - override: - - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test; sudo apt-get -y -qq update - - sudo apt-get -y install qemu-system-ppc qemu-user-static gcc-powerpc-linux-gnu - - sudo apt-get -y install qemu-system-arm gcc-arm-linux-gnueabi libc6-dev-armel-cross gcc-aarch64-linux-gnu libc6-dev-arm64-cross - - sudo apt-get -y install libc6-dev-i386 clang gcc-5 gcc-5-multilib gcc-6 valgrind - -test: - override: - # Tests compilers and C standards - - clang -v; make clangtest && make clean - - g++ -v; make gpptest && make clean - - gcc -v; make c_standards && make clean - - gcc -v; g++ -v; make ctocpptest && make clean - - gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -Werror" make check && make clean - - gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean - - gcc-6 -v; CC=gcc-6 make c_standards && make clean - - gcc-6 -v; CC=gcc-6 MOREFLAGS="-O2 -Werror" make check && make clean -# Shorter tests - - make cmake && make clean - - make -C tests test-lz4 - - make -C tests test-lz4c - - make -C tests test-frametest - - make -C tests test-fullbench - - make -C tests test-fuzzer && make clean - - make -C lib all && make clean - - pyenv global 3.4.4; make versionsTest MOREFLAGS=-I/usr/include/x86_64-linux-gnu && make clean - - make travis-install && make clean - # Longer tests - - gcc -v; CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean - - make usan && make clean - - clang -v; make staticAnalyze && make clean - # Valgrind tests - - make -C tests test-mem && make clean - # ARM, AArch64, PowerPC, PowerPC64 tests - - make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static && make clean - - make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static MOREFLAGS=-m64 && make clean - - make platformTest CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static && make clean - - make platformTest CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static && make clean From 9ee5183d9bdfd2f39e8ae2e2a4efff25e61c226d Mon Sep 17 00:00:00 2001 From: Jennifer Liu Date: Tue, 26 Jun 2018 17:31:15 -0700 Subject: [PATCH 16/45] Fixed code based on comments from pull request --- programs/bench.c | 2 ++ programs/lz4cli.c | 9 +++++---- tests/Makefile | 9 +++++++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index 1ab1b92d3..770191cfa 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -407,6 +407,8 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize, if (g_displayLevel == 1 && !g_additionalParam) DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", LZ4_VERSION_STRING, LZ4_GIT_COMMIT_STRING, (U32)benchedSize, g_nbSeconds, (U32)(g_blockSize>>10)); + if (cLevelLast < cLevel) cLevelLast = cLevel; + for (l=cLevel; l <= cLevelLast; l++) { BMK_benchMem(srcBuffer, benchedSize, displayName, l, diff --git a/programs/lz4cli.c b/programs/lz4cli.c index 94e3b14ba..669b5854d 100644 --- a/programs/lz4cli.c +++ b/programs/lz4cli.c @@ -288,8 +288,6 @@ static unsigned longCommandWArg(const char** stringPtr, const char* longCommand) typedef enum { om_auto, om_compress, om_decompress, om_test, om_bench } operationMode_e; -#define CLEAN_RETURN(i) { operationResult = (i); goto _cleanup; } - int main(int argc, const char** argv) { int i, @@ -380,7 +378,7 @@ int main(int argc, const char** argv) if (!strcmp(argument, "--keep")) { LZ4IO_setRemoveSrcFile(0); continue; } /* keep source file (default) */ if (!strcmp(argument, "--rm")) { LZ4IO_setRemoveSrcFile(1); continue; } if (longCommandWArg(&argument, "--fast")) { - /* Parse optional window log */ + /* Parse optional acceleration factor */ if (*argument == '=') { U32 fastLevel; ++argument; @@ -388,9 +386,12 @@ int main(int argc, const char** argv) if (fastLevel) { cLevel = -(int)fastLevel; } + else { + badusage(exeName); + } } else if (*argument != 0) { /* Invalid character following --fast */ - CLEAN_RETURN(badusage(exeName)); + badusage(exeName); } else { cLevel = -1; /* default for --fast */ } diff --git a/tests/Makefile b/tests/Makefile index 70e186441..abaf6480f 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -262,9 +262,14 @@ test-lz4-basic: lz4 datagen unlz4 lz4cat cat tmp-tlb-hw >> tmp-tlb-hw.lz4 $(LZ4) -f tmp-tlb-hw.lz4 # uncompress valid frame followed by invalid data $(LZ4) -BX tmp-tlb-hw -c -q | $(LZ4) -tv # test block checksum - test "$(shell ./datagen -g20KB | $(LZ4) -c --fast | wc -c)" -lt "$(shell ./datagen -g20KB | $(LZ4) -c --fast=9 | wc -c)" # compressed size of compression level -1 should be lower than -9 + # ./datagen -g20KB generates the same file every single time + # cannot save output of ./datagen -g20KB as input file to lz4 because the following shell commands are run before ./datagen -g20KB + test "$(shell ./datagen -g20KB | $(LZ4) -c --fast | wc -c)" -lt "$(shell ./datagen -g20KB | $(LZ4) -c --fast=9 | wc -c)" # -1 vs -9 test "$(shell ./datagen -g20KB | $(LZ4) -c --fast=1 | wc -c)" -eq "$(shell ./datagen -g20KB| $(LZ4) -c --fast| wc -c)" # checks default fast compression is -1 - test "$(shell ./datagen -g20KB | $(LZ4) -c -9 | wc -c)" -lt "$(shell ./datagen -g20KB| $(LZ4) -c --fast=1 | wc -c)" # compressed size of compression level 9 should be lower than -1 + test "$(shell ./datagen -g20KB | $(LZ4) -c -3 | wc -c)" -lt "$(shell ./datagen -g20KB| $(LZ4) -c --fast=1 | wc -c)" # 3 vs -1 + test "$(shell ./datagen -g20KB | $(LZ4) -c -1 | wc -c)" -lt "$(shell ./datagen -g20KB| $(LZ4) -c --fast=2 | wc -c)" # 1 vs -2 + test "$(shell $(LZ4) -c --fast=0 tmp-tlb-dg20k | wc -c)" -eq 0 # lz4 should fail when fast=0 + test "$(shell $(LZ4) -c --fast=-1 tmp-tlb-dg20k | wc -c)" -eq 0 # lz4 should fail when fast=-1 @$(RM) tmp-tlb* From 8745638d7c04bdfc33bcf656ed262c2588534db5 Mon Sep 17 00:00:00 2001 From: Jennifer Liu Date: Tue, 26 Jun 2018 17:53:12 -0700 Subject: [PATCH 17/45] Fixed invalid argument test and reformatted else --- programs/lz4cli.c | 3 +-- tests/Makefile | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/programs/lz4cli.c b/programs/lz4cli.c index 669b5854d..dc60b00bb 100644 --- a/programs/lz4cli.c +++ b/programs/lz4cli.c @@ -385,8 +385,7 @@ int main(int argc, const char** argv) fastLevel = readU32FromChar(&argument); if (fastLevel) { cLevel = -(int)fastLevel; - } - else { + } else { badusage(exeName); } } else if (*argument != 0) { diff --git a/tests/Makefile b/tests/Makefile index abaf6480f..0e7924a89 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -268,8 +268,8 @@ test-lz4-basic: lz4 datagen unlz4 lz4cat test "$(shell ./datagen -g20KB | $(LZ4) -c --fast=1 | wc -c)" -eq "$(shell ./datagen -g20KB| $(LZ4) -c --fast| wc -c)" # checks default fast compression is -1 test "$(shell ./datagen -g20KB | $(LZ4) -c -3 | wc -c)" -lt "$(shell ./datagen -g20KB| $(LZ4) -c --fast=1 | wc -c)" # 3 vs -1 test "$(shell ./datagen -g20KB | $(LZ4) -c -1 | wc -c)" -lt "$(shell ./datagen -g20KB| $(LZ4) -c --fast=2 | wc -c)" # 1 vs -2 - test "$(shell $(LZ4) -c --fast=0 tmp-tlb-dg20k | wc -c)" -eq 0 # lz4 should fail when fast=0 - test "$(shell $(LZ4) -c --fast=-1 tmp-tlb-dg20k | wc -c)" -eq 0 # lz4 should fail when fast=-1 + ! $(LZ4) -c --fast=0 tmp-tlb-dg20K # lz4 should fail when fast=0 + ! $(LZ4) -c --fast=-1 tmp-tlb-dg20K # lz4 should fail when fast=-1 @$(RM) tmp-tlb* From e778db373b67ce15311b783c49b1d054293ce2af Mon Sep 17 00:00:00 2001 From: Jennifer Liu Date: Wed, 27 Jun 2018 13:36:38 -0700 Subject: [PATCH 18/45] Fixed bugs about incorrect acceleration calculation and benchmarking negative compresion level --- lib/lz4frame.c | 4 ++-- programs/bench.c | 5 ++++- tests/Makefile | 3 +-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/lib/lz4frame.c b/lib/lz4frame.c index e1d0b1d02..08bf0faee 100644 --- a/lib/lz4frame.c +++ b/lib/lz4frame.c @@ -738,7 +738,7 @@ static size_t LZ4F_makeBlock(void* dst, const void* src, size_t srcSize, static int LZ4F_compressBlock(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict) { - int const acceleration = (level < -1) ? -level : 1; + int const acceleration = (level < 0) ? -level + 1 : 1; LZ4F_initStream(ctx, cdict, level, LZ4F_blockIndependent); if (cdict) { return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, acceleration); @@ -749,7 +749,7 @@ static int LZ4F_compressBlock(void* ctx, const char* src, char* dst, int srcSize static int LZ4F_compressBlock_continue(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict) { - int const acceleration = (level < -1) ? -level : 1; + int const acceleration = (level < 0) ? -level + 1 : 1; (void)cdict; /* init once at beginning of frame */ return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, acceleration); } diff --git a/programs/bench.c b/programs/bench.c index 770191cfa..9dc31c4ce 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -49,7 +49,10 @@ #include "lz4.h" #define COMPRESSOR0 LZ4_compress_local -static int LZ4_compress_local(const char* src, char* dst, int srcSize, int dstSize, int clevel) { (void)clevel; return LZ4_compress_default(src, dst, srcSize, dstSize); } +static int LZ4_compress_local(const char* src, char* dst, int srcSize, int dstSize, int clevel) { + int const acceleration = (clevel < 0) ? -clevel + 1 : 1; + return LZ4_compress_fast(src, dst, srcSize, dstSize, acceleration); +} #include "lz4hc.h" #define COMPRESSOR1 LZ4_compress_HC #define DEFAULTCOMPRESSOR COMPRESSOR0 diff --git a/tests/Makefile b/tests/Makefile index 0e7924a89..81033b59b 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -265,9 +265,8 @@ test-lz4-basic: lz4 datagen unlz4 lz4cat # ./datagen -g20KB generates the same file every single time # cannot save output of ./datagen -g20KB as input file to lz4 because the following shell commands are run before ./datagen -g20KB test "$(shell ./datagen -g20KB | $(LZ4) -c --fast | wc -c)" -lt "$(shell ./datagen -g20KB | $(LZ4) -c --fast=9 | wc -c)" # -1 vs -9 + test "$(shell ./datagen -g20KB | $(LZ4) -c -1 | wc -c)" -lt "$(shell ./datagen -g20KB| $(LZ4) -c --fast=1 | wc -c)" # 1 vs -1 test "$(shell ./datagen -g20KB | $(LZ4) -c --fast=1 | wc -c)" -eq "$(shell ./datagen -g20KB| $(LZ4) -c --fast| wc -c)" # checks default fast compression is -1 - test "$(shell ./datagen -g20KB | $(LZ4) -c -3 | wc -c)" -lt "$(shell ./datagen -g20KB| $(LZ4) -c --fast=1 | wc -c)" # 3 vs -1 - test "$(shell ./datagen -g20KB | $(LZ4) -c -1 | wc -c)" -lt "$(shell ./datagen -g20KB| $(LZ4) -c --fast=2 | wc -c)" # 1 vs -2 ! $(LZ4) -c --fast=0 tmp-tlb-dg20K # lz4 should fail when fast=0 ! $(LZ4) -c --fast=-1 tmp-tlb-dg20K # lz4 should fail when fast=-1 @$(RM) tmp-tlb* From 98c0c7cf9d0f2429d01e614ebd1d63a6b356f31f Mon Sep 17 00:00:00 2001 From: Cedric De Brito Date: Mon, 2 Jul 2018 14:35:04 +0200 Subject: [PATCH 19/45] Fix bug in frame decompress example The decompression was failing as the srcEnd pointer in decompress_file_internal was wrongly computed beyond the end of the memory block. We need to account for the fact that the header ("info") was already read in the calling function ("alreadyConsumed"). --- examples/frameCompress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/frameCompress.c b/examples/frameCompress.c index 9bfea483f..0f8910d42 100644 --- a/examples/frameCompress.c +++ b/examples/frameCompress.c @@ -260,7 +260,7 @@ decompress_file_allocDst(FILE* f_in, FILE* f_out, int const decompressionResult = decompress_file_internal( f_in, f_out, dctx, - src, srcCapacity, readSize, consumedSize, + src, srcCapacity, readSize-consumedSize, consumedSize, dst, dstCapacity); free(dst); From 4eca78b5c3ee689425e3a547e7d2bdd246b89f43 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 10 Jul 2018 11:44:03 -0700 Subject: [PATCH 20/45] Fix LZ4_compress_fast_continue() docs Fixes #549. --- lib/lz4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lz4.h b/lib/lz4.h index 7d1312219..64914abf9 100644 --- a/lib/lz4.h +++ b/lib/lz4.h @@ -266,7 +266,7 @@ LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, in * 'dst' buffer must be already allocated. * If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster. * - * Important : The previous 64KB of compressed data is assumed to remain present and unmodified in memory! + * Important : The previous 64KB of source data is assumed to remain present and unmodified in memory! * * Special 1 : When input is a double-buffer, they can have any size, including < 64 KB. * Make sure that buffers are separated by at least one byte. From 3358e169ee1ce51cfd5fa013391b938c33c2ea8b Mon Sep 17 00:00:00 2001 From: Jack Luo Date: Sat, 28 Jul 2018 22:12:55 -0400 Subject: [PATCH 21/45] fixed spelling mistake in lz4_manual --- doc/lz4_manual.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/lz4_manual.html b/doc/lz4_manual.html index e5044fe70..3fc71e4af 100644 --- a/doc/lz4_manual.html +++ b/doc/lz4_manual.html @@ -214,7 +214,7 @@

1.8.2 Manual

int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
 

An LZ4_streamDecode_t context can be allocated once and re-used multiple times. Use this function to start decompression of a new stream of blocks. - A dictionary can optionnally be set. Use NULL or size 0 for a reset order. + A dictionary can optionally be set. Use NULL or size 0 for a reset order. Dictionary is presumed stable : it must remain accessible and unmodified during next decompression. @return : 1 if OK, 0 if error From 2e52f03a12b8bb5fba8a86e063e0211e8afa503b Mon Sep 17 00:00:00 2001 From: Jack Luo Date: Sat, 28 Jul 2018 22:21:57 -0400 Subject: [PATCH 22/45] fixed spelling mistake in lz4.h --- lib/lz4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lz4.h b/lib/lz4.h index 64914abf9..a0eddcead 100644 --- a/lib/lz4.h +++ b/lib/lz4.h @@ -305,7 +305,7 @@ LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_str /*! LZ4_setStreamDecode() : * An LZ4_streamDecode_t context can be allocated once and re-used multiple times. * Use this function to start decompression of a new stream of blocks. - * A dictionary can optionnally be set. Use NULL or size 0 for a reset order. + * A dictionary can optionally be set. Use NULL or size 0 for a reset order. * Dictionary is presumed stable : it must remain accessible and unmodified during next decompression. * @return : 1 if OK, 0 if error */ From 672ff590b2e0d1cc9cb237d05af8490e68f9e94d Mon Sep 17 00:00:00 2001 From: Joel Johnson Date: Thu, 2 Aug 2018 00:23:00 -0600 Subject: [PATCH 23/45] Add CMake option to not build legacy lz4c program --- contrib/cmake_unofficial/CMakeLists.txt | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/contrib/cmake_unofficial/CMakeLists.txt b/contrib/cmake_unofficial/CMakeLists.txt index 27c3a7881..b09c4fb0e 100644 --- a/contrib/cmake_unofficial/CMakeLists.txt +++ b/contrib/cmake_unofficial/CMakeLists.txt @@ -12,6 +12,8 @@ set(LZ4_TOP_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../..") +option(LZ4_BUILD_LEGACY_LZ4C "Build lz4c progam with legacy argument support" ON) + # Parse version information file(STRINGS "${LZ4_TOP_SOURCE_DIR}/lib/lz4.h" LZ4_VERSION_MAJOR REGEX "^#define LZ4_VERSION_MAJOR +([0-9]+) +.*$") string(REGEX REPLACE "^#define LZ4_VERSION_MAJOR +([0-9]+) +.*$" "\\1" LZ4_VERSION_MAJOR "${LZ4_VERSION_MAJOR}") @@ -122,14 +124,18 @@ else() endif() # lz4 +set(LZ4_PROGRAMS_BUILT lz4cli) add_executable(lz4cli ${LZ4_CLI_SOURCES}) set_target_properties(lz4cli PROPERTIES OUTPUT_NAME lz4) target_link_libraries(lz4cli ${LZ4_LINK_LIBRARY}) # lz4c -add_executable(lz4c ${LZ4_CLI_SOURCES}) -set_target_properties(lz4c PROPERTIES COMPILE_DEFINITIONS "ENABLE_LZ4C_LEGACY_OPTIONS") -target_link_libraries(lz4c ${LZ4_LINK_LIBRARY}) +if (LZ4_BUILD_LEGACY_LZ4C) + list(APPEND LZ4_PROGRAMS_BUILT lz4c) + add_executable(lz4c ${LZ4_CLI_SOURCES}) + set_target_properties(lz4c PROPERTIES COMPILE_DEFINITIONS "ENABLE_LZ4C_LEGACY_OPTIONS") + target_link_libraries(lz4c ${LZ4_LINK_LIBRARY}) +endif() # Extra warning flags include (CheckCCompilerFlag) @@ -165,7 +171,7 @@ endforeach (flag) if(NOT LZ4_BUNDLED_MODE) include(GNUInstallDirs) - install(TARGETS lz4cli lz4c + install(TARGETS ${LZ4_PROGRAMS_BUILT} RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") install(TARGETS ${LZ4_LIBRARIES_BUILT} LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" From 9c457ccb7aed26edda7a3299207a4f329057866d Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Mon, 13 Aug 2018 14:17:54 +0900 Subject: [PATCH 24/45] Add missing $(EXT) --- programs/Makefile | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/programs/Makefile b/programs/Makefile index 72bdcaac1..9cc2d948d 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -82,7 +82,7 @@ lz4-release: DEBUGFLAGS= lz4-release: lz4 lz4c: lz4 - ln -s lz4 lz4c + ln -s lz4$(EXT) lz4c$(EXT) lz4c32: CFLAGS += -m32 lz4c32 : $(SRCFILES) @@ -102,7 +102,7 @@ preview-man: clean-man man clean: @$(MAKE) -C $(LZ4DIR) $@ > $(VOID) @$(RM) core *.o *.test tmp* \ - lz4$(EXT) lz4c$(EXT) lz4c32$(EXT) unlz4 lz4cat + lz4$(EXT) lz4c$(EXT) lz4c32$(EXT) unlz4$(EXT) lz4cat$(EXT) @echo Cleaning completed @@ -112,10 +112,10 @@ clean: ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS)) unlz4: lz4 - ln -s lz4 unlz4 + ln -s lz4$(EXT) unlz4$(EXT) lz4cat: lz4 - ln -s lz4 lz4cat + ln -s lz4$(EXT) lz4cat$(EXT) DESTDIR ?= # directory variables : GNU conventions prefer lowercase @@ -147,10 +147,10 @@ INSTALL_DATA ?= $(INSTALL) -m 644 install: lz4 @echo Installing binaries @$(INSTALL) -d -m 755 $(DESTDIR)$(bindir)/ $(DESTDIR)$(man1dir)/ - @$(INSTALL_PROGRAM) lz4 $(DESTDIR)$(bindir)/lz4 - @ln -sf lz4 $(DESTDIR)$(bindir)/lz4c - @ln -sf lz4 $(DESTDIR)$(bindir)/lz4cat - @ln -sf lz4 $(DESTDIR)$(bindir)/unlz4 + @$(INSTALL_PROGRAM) lz4$(EXT) $(DESTDIR)$(bindir)/lz4$(EXT) + @ln -sf lz4$(EXT) $(DESTDIR)$(bindir)/lz4c$(EXT) + @ln -sf lz4$(EXT) $(DESTDIR)$(bindir)/lz4cat$(EXT) + @ln -sf lz4$(EXT) $(DESTDIR)$(bindir)/unlz4$(EXT) @echo Installing man pages @$(INSTALL_DATA) lz4.1 $(DESTDIR)$(man1dir)/lz4.1 @ln -sf lz4.1 $(DESTDIR)$(man1dir)/lz4c.1 @@ -159,10 +159,10 @@ install: lz4 @echo lz4 installation completed uninstall: - @$(RM) $(DESTDIR)$(bindir)/lz4cat - @$(RM) $(DESTDIR)$(bindir)/unlz4 - @$(RM) $(DESTDIR)$(bindir)/lz4 - @$(RM) $(DESTDIR)$(bindir)/lz4c + @$(RM) $(DESTDIR)$(bindir)/lz4cat$(EXT) + @$(RM) $(DESTDIR)$(bindir)/unlz4$(EXT) + @$(RM) $(DESTDIR)$(bindir)/lz4$(EXT) + @$(RM) $(DESTDIR)$(bindir)/lz4c$(EXT) @$(RM) $(DESTDIR)$(man1dir)/lz4.1 @$(RM) $(DESTDIR)$(man1dir)/lz4c.1 @$(RM) $(DESTDIR)$(man1dir)/lz4cat.1 From d2d566097031e5119980eb5ebbfa47023bc33c55 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 4 Sep 2018 16:50:34 -0700 Subject: [PATCH 25/45] new test program : roundTripTest make a round trip test with arbitrary input file, generate an `abort()` on error, to work in tandem with `afl`. note : currently locked on level 9, to investigate #560. --- tests/.gitignore | 1 + tests/Makefile | 5 +- tests/roundTripTest.c | 203 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 208 insertions(+), 1 deletion(-) create mode 100644 tests/roundTripTest.c diff --git a/tests/.gitignore b/tests/.gitignore index 36dff4207..58947f7a4 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -8,6 +8,7 @@ fullbench32 fuzzer fuzzer32 fasttest +roundTripTest checkTag # test artefacts diff --git a/tests/Makefile b/tests/Makefile index 81033b59b..bc432341d 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -63,7 +63,7 @@ NB_LOOPS ?= -i1 default: all -all: fullbench fuzzer frametest datagen +all: fullbench fuzzer frametest roundTripTest datagen all32: CFLAGS+=-m32 all32: all @@ -103,6 +103,9 @@ fuzzer : lz4.o lz4hc.o xxhash.o fuzzer.c frametest: lz4frame.o lz4.o lz4hc.o xxhash.o frametest.c $(CC) $(FLAGS) $^ -o $@$(EXT) +roundTripTest : lz4.o lz4hc.o xxhash.o roundTripTest.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + datagen : $(PRGDIR)/datagen.c datagencli.c $(CC) $(FLAGS) -I$(PRGDIR) $^ -o $@$(EXT) diff --git a/tests/roundTripTest.c b/tests/roundTripTest.c new file mode 100644 index 000000000..2f161d84f --- /dev/null +++ b/tests/roundTripTest.c @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* + * This program takes a file in input, + * performs an LZ4 round-trip test (compression - decompress) + * compares the result with original + * and generates an abort() on corruption detection, + * in order for afl to register the event as a crash. +*/ + + +/*=========================================== +* Dependencies +*==========================================*/ +#include /* size_t */ +#include /* malloc, free, exit */ +#include /* fprintf */ +#include /* strcmp */ +#include +#include /* stat */ +#include /* stat */ +#include "xxhash.h" + +#include "lz4.h" +#include "lz4hc.h" + + +/*=========================================== +* Macros +*==========================================*/ +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +#define MSG(...) fprintf(stderr, __VA_ARGS__) + +#define CONTROL_MSG(c, ...) { \ + if ((c)) { \ + MSG(__VA_ARGS__); \ + MSG(" \n"); \ + abort(); \ + } \ +} + + +static size_t checkBuffers(const void* buff1, const void* buff2, size_t buffSize) +{ + const char* const ip1 = (const char*)buff1; + const char* const ip2 = (const char*)buff2; + size_t pos; + + for (pos=0; pos= LZ4_compressBound(srcSize)` + * for compression to be guaranteed to work */ +static void roundTripTest(void* resultBuff, size_t resultBuffCapacity, + void* compressedBuff, size_t compressedBuffCapacity, + const void* srcBuff, size_t srcSize) +{ + const int minCLevel = 1; + const int maxClevel = 12; + const int cLevelSpan = maxClevel - minCLevel; + size_t const hashLength = MIN(16, srcSize); + unsigned const h32 = XXH32(srcBuff, hashLength, 0); + int const randL = h32 % (cLevelSpan+1); + int const cLevel = minCLevel + randL; + int const realCLevel = (cLevel * 0) + 9; /* <=== Currently : only test level 9 */ + int const cSize = LZ4_compress_HC(srcBuff, compressedBuff, srcSize, compressedBuffCapacity, realCLevel); + CONTROL_MSG(cSize == 0, "Compression error !"); + + { int const dSize = LZ4_decompress_safe(compressedBuff, resultBuff, cSize, resultBuffCapacity); + CONTROL_MSG(dSize < 0, "Decompression detected an error !"); + CONTROL_MSG(dSize != (int)srcSize, "Decompression corruption error : wrong decompressed size !"); + } + + /* check potential content corruption error */ + assert(resultBuffCapacity >= srcSize); + { size_t const errorPos = checkBuffers(srcBuff, resultBuff, srcSize); + CONTROL_MSG(errorPos != srcSize, + "Silent decoding corruption, at pos %u !!!", + (unsigned)errorPos); + } + +} + +static void roundTripCheck(const void* srcBuff, size_t srcSize) +{ + size_t const cBuffSize = LZ4_compressBound(srcSize); + void* const cBuff = malloc(cBuffSize); + void* const rBuff = malloc(cBuffSize); + + if (!cBuff || !rBuff) { + fprintf(stderr, "not enough memory ! \n"); + exit(1); + } + + roundTripTest(rBuff, cBuffSize, + cBuff, cBuffSize, + srcBuff, srcSize); + + free(rBuff); + free(cBuff); +} + + +static size_t getFileSize(const char* infilename) +{ + int r; +#if defined(_MSC_VER) + struct _stat64 statbuf; + r = _stat64(infilename, &statbuf); + if (r || !(statbuf.st_mode & S_IFREG)) return 0; /* No good... */ +#else + struct stat statbuf; + r = stat(infilename, &statbuf); + if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */ +#endif + return (size_t)statbuf.st_size; +} + + +static int isDirectory(const char* infilename) +{ + int r; +#if defined(_MSC_VER) + struct _stat64 statbuf; + r = _stat64(infilename, &statbuf); + if (!r && (statbuf.st_mode & _S_IFDIR)) return 1; +#else + struct stat statbuf; + r = stat(infilename, &statbuf); + if (!r && S_ISDIR(statbuf.st_mode)) return 1; +#endif + return 0; +} + + +/** loadFile() : + * requirement : `buffer` size >= `fileSize` */ +static void loadFile(void* buffer, const char* fileName, size_t fileSize) +{ + FILE* const f = fopen(fileName, "rb"); + if (isDirectory(fileName)) { + fprintf(stderr, "Ignoring %s directory \n", fileName); + exit(2); + } + if (f==NULL) { + fprintf(stderr, "Impossible to open %s \n", fileName); + exit(3); + } + { size_t const readSize = fread(buffer, 1, fileSize, f); + if (readSize != fileSize) { + fprintf(stderr, "Error reading %s \n", fileName); + exit(5); + } } + fclose(f); +} + + +static void fileCheck(const char* fileName) +{ + size_t const fileSize = getFileSize(fileName); + void* const buffer = malloc(fileSize + !fileSize /* avoid 0 */); + if (!buffer) { + fprintf(stderr, "not enough memory \n"); + exit(4); + } + loadFile(buffer, fileName, fileSize); + roundTripCheck(buffer, fileSize); + free (buffer); +} + + +int main(int argCount, const char** argv) +{ + int const argNb = 1; + + if (argCount < 2) { + fprintf(stderr, "Error : no argument : need input file \n"); + exit(9); + } + + fileCheck(argv[argNb]); + fprintf(stderr, "no pb detected \n"); + return 0; +} From 943fa6244a3985541bc93250d35ea4f4517fb266 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 4 Sep 2018 17:37:56 -0700 Subject: [PATCH 26/45] fix minor cast warning for C++ compilation --- tests/.gitignore | 5 ++++- tests/roundTripTest.c | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/.gitignore b/tests/.gitignore index 58947f7a4..9aa42a064 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -1,5 +1,5 @@ -# test build artefacts +# build artefacts datagen frametest frametest32 @@ -14,3 +14,6 @@ checkTag # test artefacts tmp* versionsTest + +# local tests +afl diff --git a/tests/roundTripTest.c b/tests/roundTripTest.c index 2f161d84f..2caa2bc25 100644 --- a/tests/roundTripTest.c +++ b/tests/roundTripTest.c @@ -82,10 +82,10 @@ static void roundTripTest(void* resultBuff, size_t resultBuffCapacity, int const randL = h32 % (cLevelSpan+1); int const cLevel = minCLevel + randL; int const realCLevel = (cLevel * 0) + 9; /* <=== Currently : only test level 9 */ - int const cSize = LZ4_compress_HC(srcBuff, compressedBuff, srcSize, compressedBuffCapacity, realCLevel); + int const cSize = LZ4_compress_HC((const char*)srcBuff, (char*)compressedBuff, (int)srcSize, (int)compressedBuffCapacity, realCLevel); CONTROL_MSG(cSize == 0, "Compression error !"); - { int const dSize = LZ4_decompress_safe(compressedBuff, resultBuff, cSize, resultBuffCapacity); + { int const dSize = LZ4_decompress_safe((const char*)compressedBuff, (char*)resultBuff, cSize, (int)resultBuffCapacity); CONTROL_MSG(dSize < 0, "Decompression detected an error !"); CONTROL_MSG(dSize != (int)srcSize, "Decompression corruption error : wrong decompressed size !"); } @@ -102,7 +102,7 @@ static void roundTripTest(void* resultBuff, size_t resultBuffCapacity, static void roundTripCheck(const void* srcBuff, size_t srcSize) { - size_t const cBuffSize = LZ4_compressBound(srcSize); + size_t const cBuffSize = LZ4_compressBound((int)srcSize); void* const cBuff = malloc(cBuffSize); void* const rBuff = malloc(cBuffSize); From 2e4847c2d55fb4c4f5dc833e61b5f374da8407e6 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 4 Sep 2018 18:21:40 -0700 Subject: [PATCH 27/45] fixed #560 it was a fairly complex scenario, involving source files > 64K and some extraordinary conditions related to specific layout of ranges of zeroes. and only on level 9. --- lib/lz4hc.c | 4 ++++ programs/bench.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 8108ea011..236dc5b3c 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -327,6 +327,8 @@ LZ4HC_InsertAndGetWiderMatch ( if (lookBackLength==0) { /* no back possible */ size_t const maxML = MIN(currentSegmentLength, srcPatternLength); if ((size_t)longest < maxML) { + assert(base + matchIndex < ip); + if (ip - (base+matchIndex) > MAX_DISTANCE) break; assert(maxML < 2 GB); longest = (int)maxML; *matchpos = base + matchIndex; /* virtual pos, relative to ip, to retrieve offset */ @@ -450,6 +452,8 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( *op += length; /* Encode Offset */ + assert(*ip > match); + assert( (*ip - match) <= MAX_DISTANCE ); LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2; /* Encode MatchLength */ diff --git a/programs/bench.c b/programs/bench.c index 9dc31c4ce..11bf0440c 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -329,7 +329,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, { U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); if (crcOrig!=crcCheck) { size_t u; - DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); + DISPLAY("\n!!! WARNING !!! %17s : Invalid Checksum : %x != %x \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); for (u=0; u Date: Tue, 4 Sep 2018 18:23:21 -0700 Subject: [PATCH 28/45] made roundTripTest fully general no longer "locked" on level 9 --- tests/roundTripTest.c | 101 ++++++++++++++++++++++++++++++------------ 1 file changed, 73 insertions(+), 28 deletions(-) diff --git a/tests/roundTripTest.c b/tests/roundTripTest.c index 2caa2bc25..2d344518e 100644 --- a/tests/roundTripTest.c +++ b/tests/roundTripTest.c @@ -10,13 +10,22 @@ /* * This program takes a file in input, - * performs an LZ4 round-trip test (compression - decompress) + * performs an LZ4 round-trip test (compress + decompress) * compares the result with original * and generates an abort() on corruption detection, * in order for afl to register the event as a crash. */ +/*=========================================== +* Tuning Constant +*==========================================*/ +#ifndef MIN_CLEVEL +# define MIN_CLEVEL (int)(-5) +#endif + + + /*=========================================== * Dependencies *==========================================*/ @@ -62,27 +71,42 @@ static size_t checkBuffers(const void* buff1, const void* buff2, size_t buffSize return pos; } + +/* select a compression level + * based on first bytes present in a reference buffer */ +static int select_clevel(const void* refBuff, size_t refBuffSize) +{ + const int minCLevel = MIN_CLEVEL; + const int maxClevel = LZ4HC_CLEVEL_MAX; + const int cLevelSpan = maxClevel - minCLevel; + size_t const hashLength = MIN(16, refBuffSize); + unsigned const h32 = XXH32(refBuff, hashLength, 0); + int const randL = h32 % (cLevelSpan+1); + + return minCLevel + randL; +} + + +typedef int (*compressFn)(const char* src, char* dst, int srcSize, int dstSize, int cLevel); + + /** roundTripTest() : * Compresses `srcBuff` into `compressedBuff`, * then decompresses `compressedBuff` into `resultBuff`. - * Compression level used is derived from content's head bytes. - * This function abort() if it detects any round-trip error, - * so if it returns, round trip is considered successfully validated. + * If clevel==0, compression level is derived from srcBuff's content head bytes. + * This function abort() if it detects any round-trip error. + * Therefore, if it returns, round trip is considered successfully validated. * Note : `compressedBuffCapacity` should be `>= LZ4_compressBound(srcSize)` * for compression to be guaranteed to work */ static void roundTripTest(void* resultBuff, size_t resultBuffCapacity, void* compressedBuff, size_t compressedBuffCapacity, - const void* srcBuff, size_t srcSize) + const void* srcBuff, size_t srcSize, + int clevel) { - const int minCLevel = 1; - const int maxClevel = 12; - const int cLevelSpan = maxClevel - minCLevel; - size_t const hashLength = MIN(16, srcSize); - unsigned const h32 = XXH32(srcBuff, hashLength, 0); - int const randL = h32 % (cLevelSpan+1); - int const cLevel = minCLevel + randL; - int const realCLevel = (cLevel * 0) + 9; /* <=== Currently : only test level 9 */ - int const cSize = LZ4_compress_HC((const char*)srcBuff, (char*)compressedBuff, (int)srcSize, (int)compressedBuffCapacity, realCLevel); + int const proposed_clevel = clevel ? clevel : select_clevel(srcBuff, srcSize); + int const selected_clevel = proposed_clevel < 0 ? -proposed_clevel : proposed_clevel; /* if level < 0, it becomes an accelearion value */ + compressFn compress = selected_clevel >= LZ4HC_CLEVEL_MIN ? LZ4_compress_HC : LZ4_compress_fast; + int const cSize = compress((const char*)srcBuff, (char*)compressedBuff, (int)srcSize, (int)compressedBuffCapacity, selected_clevel); CONTROL_MSG(cSize == 0, "Compression error !"); { int const dSize = LZ4_decompress_safe((const char*)compressedBuff, (char*)resultBuff, cSize, (int)resultBuffCapacity); @@ -100,7 +124,7 @@ static void roundTripTest(void* resultBuff, size_t resultBuffCapacity, } -static void roundTripCheck(const void* srcBuff, size_t srcSize) +static void roundTripCheck(const void* srcBuff, size_t srcSize, int clevel) { size_t const cBuffSize = LZ4_compressBound((int)srcSize); void* const cBuff = malloc(cBuffSize); @@ -113,7 +137,8 @@ static void roundTripCheck(const void* srcBuff, size_t srcSize) roundTripTest(rBuff, cBuffSize, cBuff, cBuffSize, - srcBuff, srcSize); + srcBuff, srcSize, + clevel); free(rBuff); free(cBuff); @@ -158,46 +183,66 @@ static void loadFile(void* buffer, const char* fileName, size_t fileSize) { FILE* const f = fopen(fileName, "rb"); if (isDirectory(fileName)) { - fprintf(stderr, "Ignoring %s directory \n", fileName); + MSG("Ignoring %s directory \n", fileName); exit(2); } if (f==NULL) { - fprintf(stderr, "Impossible to open %s \n", fileName); + MSG("Impossible to open %s \n", fileName); exit(3); } { size_t const readSize = fread(buffer, 1, fileSize, f); if (readSize != fileSize) { - fprintf(stderr, "Error reading %s \n", fileName); + MSG("Error reading %s \n", fileName); exit(5); } } fclose(f); } -static void fileCheck(const char* fileName) +static void fileCheck(const char* fileName, int clevel) { size_t const fileSize = getFileSize(fileName); void* const buffer = malloc(fileSize + !fileSize /* avoid 0 */); if (!buffer) { - fprintf(stderr, "not enough memory \n"); + MSG("not enough memory \n"); exit(4); } loadFile(buffer, fileName, fileSize); - roundTripCheck(buffer, fileSize); + roundTripCheck(buffer, fileSize, clevel); free (buffer); } +int bad_usage(const char* exeName) +{ + MSG(" \n"); + MSG("bad usage: \n"); + MSG(" \n"); + MSG("%s [Options] fileName \n", exeName); + MSG(" \n"); + MSG("Options: \n"); + MSG("-# : use #=[0-9] compression level (default:0 == random) \n"); + return 1; +} + + int main(int argCount, const char** argv) { - int const argNb = 1; + const char* const exeName = argv[0]; + int argNb = 1; + int clevel = 0; + + assert(argCount >= 1); + if (argCount < 2) return bad_usage(exeName); - if (argCount < 2) { - fprintf(stderr, "Error : no argument : need input file \n"); - exit(9); + if (argv[1][0] == '-') { + clevel = argv[1][1] - '0'; + argNb = 2; } - fileCheck(argv[argNb]); - fprintf(stderr, "no pb detected \n"); + if (argNb >= argCount) return bad_usage(exeName); + + fileCheck(argv[argNb], clevel); + MSG("no pb detected \n"); return 0; } From 30f6f34328733ec4e74c78c06f667810db0417df Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 5 Sep 2018 11:25:10 -0700 Subject: [PATCH 29/45] removed one assert() condition which is not correct when using LZ4_HC with dictionary and starting from a low address (<0x10000). --- lib/lz4hc.c | 3 +-- tests/fuzzer.c | 15 ++++++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 236dc5b3c..e913ee7b3 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -452,8 +452,7 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( *op += length; /* Encode Offset */ - assert(*ip > match); - assert( (*ip - match) <= MAX_DISTANCE ); + assert( (*ip - match) <= MAX_DISTANCE ); /* note : consider providing offset as a value, rather than as a pointer difference */ LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2; /* Encode MatchLength */ diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 5a92f8f06..6c7951596 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -323,12 +323,17 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c int result = 0; unsigned cycleNb; -# define FUZ_CHECKTEST(cond, ...) if (cond) { printf("Test %u : ", testNb); printf(__VA_ARGS__); \ - printf(" (seed %u, cycle %u) \n", seed, cycleNb); goto _output_error; } +# define FUZ_CHECKTEST(cond, ...) \ + if (cond) { \ + printf("Test %u : ", testNb); printf(__VA_ARGS__); \ + printf(" (seed %u, cycle %u) \n", seed, cycleNb); \ + goto _output_error; \ + } + # define FUZ_DISPLAYTEST(...) { \ testNb++; \ if (g_displayLevel>=4) { \ - printf("\r%4u - %2u ", cycleNb, testNb); \ + printf("\r%4u - %2u :", cycleNb, testNb); \ printf(" " __VA_ARGS__); \ printf(" "); \ fflush(stdout); \ @@ -805,7 +810,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe_usingDict should have failed : not enough output size (-1 byte)"); FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_safe_usingDict overrun specified output buffer size"); - FUZ_DISPLAYTEST(); + FUZ_DISPLAYTEST("LZ4_decompress_safe_usingDict with a too small output buffer"); { U32 const missingBytes = (FUZ_rand(&randState) & 0xF) + 2; if ((U32)blockSize > missingBytes) { decodedBuffer[blockSize-missingBytes] = 0; @@ -815,7 +820,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c } } /* Compress HC using External dictionary */ - FUZ_DISPLAYTEST(); + FUZ_DISPLAYTEST("LZ4_compress_HC_continue with an external dictionary"); dict -= (FUZ_rand(&randState) & 7); /* even bigger separation */ if (dict < (char*)CNBuffer) dict = (char*)CNBuffer; LZ4_resetStreamHC (&LZ4dictHC, compressionLevel); From 0fea528e3a70f8578ca6e7f15d922dab8aa9ff25 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 5 Sep 2018 14:05:08 -0700 Subject: [PATCH 30/45] updated documentation regarding dictionary compression following suggestion from @stbrumme (#558) Also : bumped version number, regenerated man page and html doc --- NEWS | 6 ++++++ README.md | 23 ++++++++++++++--------- doc/lz4_manual.html | 6 +++--- doc/lz4frame_manual.html | 4 ++-- lib/lz4.h | 2 +- programs/lz4.1 | 14 +++++++++++--- programs/lz4.1.md | 20 +++++++++++++------- programs/lz4cli.c | 2 +- tests/.gitignore | 6 +++++- tests/Makefile | 3 ++- 10 files changed, 58 insertions(+), 28 deletions(-) diff --git a/NEWS b/NEWS index 0139e6123..8ee3c92d4 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,9 @@ +v1.8.3 +fix : data corruption for files > 64KB at level 9 under specific conditions (#560) +cli : new command --fast, by @jennifermliu +build : added Haiku target, by @fbrosson +doc : updated documentation regarding dictionary compression + v1.8.2 perf: *much* faster dictionary compression on small files, by @felixhandte perf: improved decompression speed and binary size, by Alexey Tourbin (@svpv) diff --git a/README.md b/README.md index 406792a11..e64020d1c 100644 --- a/README.md +++ b/README.md @@ -2,18 +2,23 @@ LZ4 - Extremely fast compression ================================ LZ4 is lossless compression algorithm, -providing compression speed at 400 MB/s per core, +providing compression speed > 500 MB/s per core, scalable with multi-cores CPU. It features an extremely fast decoder, with speed in multiple GB/s per core, typically reaching RAM speed limits on multi-core systems. Speed can be tuned dynamically, selecting an "acceleration" factor -which trades compression ratio for more speed up. +which trades compression ratio for faster speed. On the other end, a high compression derivative, LZ4_HC, is also provided, trading CPU time for improved compression ratio. All versions feature the same decompression speed. +LZ4 is also compatible with [dictionary compression](https://github.com/facebook/zstd#the-case-for-small-data-compression), +and can ingest any input file as dictionary, +including those created by [Zstandard Dictionary Builder](https://github.com/facebook/zstd/blob/v1.3.5/programs/zstd.1.md#dictionary-builder). +(note: only the final 64KB are used). + LZ4 library is provided as open-source software using BSD 2-Clause license. @@ -67,8 +72,8 @@ in single-thread mode. [zlib]: http://www.zlib.net/ [Zstandard]: http://www.zstd.net/ -LZ4 is also compatible and well optimized for x32 mode, -for which it provides some additional speed performance. +LZ4 is also compatible and optimized for x32 mode, +for which it provides additional speed performance. Installation @@ -76,7 +81,7 @@ Installation ``` make -make install # this command may require root access +make install # this command may require root permissions ``` LZ4's `Makefile` supports standard [Makefile conventions], @@ -94,10 +99,10 @@ Documentation The raw LZ4 block compression format is detailed within [lz4_Block_format]. -To compress an arbitrarily long file or data stream, multiple blocks are required. -Organizing these blocks and providing a common header format to handle their content -is the purpose of the Frame format, defined into [lz4_Frame_format]. -Interoperable versions of LZ4 must respect this frame format. +Arbitrarily long files or data streams are compressed using multiple blocks, +for streaming requirements. These blocks are organized into a frame, +defined into [lz4_Frame_format]. +Interoperable versions of LZ4 must also respect the frame format. [lz4_Block_format]: doc/lz4_Block_format.md [lz4_Frame_format]: doc/lz4_Frame_format.md diff --git a/doc/lz4_manual.html b/doc/lz4_manual.html index 3fc71e4af..c7c576346 100644 --- a/doc/lz4_manual.html +++ b/doc/lz4_manual.html @@ -1,10 +1,10 @@ -1.8.2 Manual +1.8.3 Manual -

1.8.2 Manual

+

1.8.3 Manual


Contents

    @@ -179,7 +179,7 @@

    1.8.2 Manual

    'dst' buffer must be already allocated. If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster. - Important : The previous 64KB of compressed data is assumed to remain present and unmodified in memory! + Important : The previous 64KB of source data is assumed to remain present and unmodified in memory! Special 1 : When input is a double-buffer, they can have any size, including < 64 KB. Make sure that buffers are separated by at least one byte. diff --git a/doc/lz4frame_manual.html b/doc/lz4frame_manual.html index 53ea7eb19..fb8e0ceb3 100644 --- a/doc/lz4frame_manual.html +++ b/doc/lz4frame_manual.html @@ -1,10 +1,10 @@ -1.8.2 Manual +1.8.3 Manual -

    1.8.2 Manual

    +

    1.8.3 Manual


    Contents

      diff --git a/lib/lz4.h b/lib/lz4.h index a0eddcead..491b67a2a 100644 --- a/lib/lz4.h +++ b/lib/lz4.h @@ -93,7 +93,7 @@ extern "C" { /*------ Version ------*/ #define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */ #define LZ4_VERSION_MINOR 8 /* for new (non-breaking) interface capabilities */ -#define LZ4_VERSION_RELEASE 2 /* for tweaks, bug-fixes, or development */ +#define LZ4_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */ #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE) diff --git a/programs/lz4.1 b/programs/lz4.1 index e0f6a81b3..f35e29d2b 100644 --- a/programs/lz4.1 +++ b/programs/lz4.1 @@ -1,5 +1,5 @@ . -.TH "LZ4" "1" "2018-01-13" "lz4 1.8.1" "User Commands" +.TH "LZ4" "1" "September 2018" "lz4 1.8.3" "User Commands" . .SH "NAME" \fBlz4\fR \- lz4, unlz4, lz4cat \- Compress or decompress \.lz4 files @@ -115,7 +115,11 @@ Benchmark mode, using \fB#\fR compression level\. . .TP \fB\-#\fR -Compression level, with # being any value from 1 to 16\. Higher values trade compression speed for compression ratio\. Values above 16 are considered the same as 16\. Recommended values are 1 for fast compression (default), and 9 for high compression\. Speed/compression trade\-off will vary depending on data to compress\. Decompression speed remains fast at all settings\. +Compression level, with # being any value from 1 to 12\. Higher values trade compression speed for compression ratio\. Values above 12 are considered the same as 12\. Recommended values are 1 for fast compression (default), and 9 for high compression\. Speed/compression trade\-off will vary depending on data to compress\. Decompression speed remains fast at all settings\. +. +.TP +\fB\-D dictionaryName\fR +Compress, decompress or benchmark using dictionary \fIdictionaryName\fR\. Compression and decompression must use the same dictionary to be compatible\. Using a different dictionary during decompression will either abort due to decompression error, or generate a checksum error\. . .TP \fB\-f\fR \fB\-\-[no\-]force\fR @@ -151,6 +155,10 @@ Block size [4\-7](default : 7) Block Dependency (improves compression ratio on small blocks) . .TP +\fB\-\-fast[=#]\fR +switch to ultra\-fast compression levels\. If \fB=#\fR is not present, it defaults to \fB1\fR\. The higher the value, the faster the compression speed, at the cost of some compression ratio\. This setting overwrites compression level if one was set previously\. Similarly, if a compression level is set after \fB\-\-fast\fR, it overrides it\. +. +.TP \fB\-\-[no\-]frame\-crc\fR Select frame checksum (default:enabled) . @@ -214,7 +222,7 @@ Benchmark multiple compression levels, from b# to e# (included) . .TP \fB\-i#\fR -Minimum evaluation in seconds [1\-9] (default : 3) +Minimum evaluation time in seconds [1\-9] (default : 3) . .SH "BUGS" Report bugs at: https://github\.com/lz4/lz4/issues diff --git a/programs/lz4.1.md b/programs/lz4.1.md index d4eaf8aab..12b8e29de 100644 --- a/programs/lz4.1.md +++ b/programs/lz4.1.md @@ -125,6 +125,19 @@ only the latest one will be applied. Speed/compression trade-off will vary depending on data to compress. Decompression speed remains fast at all settings. +* `--fast[=#]`: + switch to ultra-fast compression levels. + The higher the value, the faster the compression speed, at the cost of some compression ratio. + If `=#` is not present, it defaults to `1`. + This setting overrides compression level if one was set previously. + Similarly, if a compression level is set after `--fast`, it overrides it. + +* `-D dictionaryName`: + Compress, decompress or benchmark using dictionary _dictionaryName_. + Compression and decompression must use the same dictionary to be compatible. + Using a different dictionary during decompression will either + abort due to decompression error, or generate a checksum error. + * `-f` `--[no-]force`: This option has several effects: @@ -156,13 +169,6 @@ only the latest one will be applied. * `-BD`: Block Dependency (improves compression ratio on small blocks) -* `--fast[=#]`: - switch to ultra-fast compression levels. - If `=#` is not present, it defaults to `1`. - The higher the value, the faster the compression speed, at the cost of some compression ratio. - This setting overwrites compression level if one was set previously. - Similarly, if a compression level is set after `--fast`, it overrides it. - * `--[no-]frame-crc`: Select frame checksum (default:enabled) diff --git a/programs/lz4cli.c b/programs/lz4cli.c index dc60b00bb..26a8089bd 100644 --- a/programs/lz4cli.c +++ b/programs/lz4cli.c @@ -110,7 +110,7 @@ static int usage(const char* exeName) DISPLAY( " -9 : High compression \n"); DISPLAY( " -d : decompression (default for %s extension)\n", LZ4_EXTENSION); DISPLAY( " -z : force compression \n"); - DISPLAY( " -D FILE: use dictionary in FILE \n"); + DISPLAY( " -D FILE: use FILE as dictionary \n"); DISPLAY( " -f : overwrite output without prompting \n"); DISPLAY( " -k : preserve source files(s) (default) \n"); DISPLAY( "--rm : remove source file(s) after successful de/compression \n"); diff --git a/tests/.gitignore b/tests/.gitignore index 36dff4207..9aa42a064 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -1,5 +1,5 @@ -# test build artefacts +# build artefacts datagen frametest frametest32 @@ -8,8 +8,12 @@ fullbench32 fuzzer fuzzer32 fasttest +roundTripTest checkTag # test artefacts tmp* versionsTest + +# local tests +afl diff --git a/tests/Makefile b/tests/Makefile index 81033b59b..f270c46de 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -114,7 +114,8 @@ clean: fullbench$(EXT) fullbench32$(EXT) \ fuzzer$(EXT) fuzzer32$(EXT) \ frametest$(EXT) frametest32$(EXT) \ - fasttest$(EXT) datagen$(EXT) checkTag$(EXT) + fasttest$(EXT) roundTripTest$(EXT) \ + datagen$(EXT) checkTag$(EXT) @rm -fR $(TESTDIR) @echo Cleaning completed From b61991491ab000229d39028c9f80939721825c72 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 5 Sep 2018 15:16:03 -0700 Subject: [PATCH 31/45] fixed compression time displayed at the end (#555) --- programs/lz4io.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/programs/lz4io.c b/programs/lz4io.c index b52c1f32f..28d6537b5 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -628,16 +628,23 @@ static int LZ4IO_compressFilename_extRess(cRess_t ress, const char* srcFileName, /* Copy owner, file permissions and modification time */ { stat_t statbuf; - if (strcmp (srcFileName, stdinmark) && strcmp (dstFileName, stdoutmark) && strcmp (dstFileName, nulmark) && UTIL_getFileStat(srcFileName, &statbuf)) + if (strcmp (srcFileName, stdinmark) + && strcmp (dstFileName, stdoutmark) + && strcmp (dstFileName, nulmark) + && UTIL_getFileStat(srcFileName, &statbuf)) { UTIL_setFileStat(dstFileName, &statbuf); - } + } } - if (g_removeSrcFile) { if (remove(srcFileName)) EXM_THROW(40, "Remove error : %s: %s", srcFileName, strerror(errno)); } /* remove source file : --rm */ + if (g_removeSrcFile) { /* remove source file : --rm */ + if (remove(srcFileName)) + EXM_THROW(40, "Remove error : %s: %s", srcFileName, strerror(errno)); + } /* Final Status */ DISPLAYLEVEL(2, "\r%79s\r", ""); DISPLAYLEVEL(2, "Compressed %llu bytes into %llu bytes ==> %.2f%%\n", - filesize, compressedfilesize, (double)compressedfilesize/(filesize + !filesize)*100); /* avoid division by zero */ + filesize, compressedfilesize, + (double)compressedfilesize / (filesize + !filesize /* avoid division by zero */ ) * 100); return 0; } @@ -645,21 +652,25 @@ static int LZ4IO_compressFilename_extRess(cRess_t ress, const char* srcFileName, int LZ4IO_compressFilename(const char* srcFileName, const char* dstFileName, int compressionLevel) { - clock_t const start = clock(); + UTIL_time_t const timeStart = UTIL_getTime(); + clock_t const cpuStart = clock(); cRess_t const ress = LZ4IO_createCResources(); - int const issueWithSrcFile = LZ4IO_compressFilename_extRess(ress, srcFileName, dstFileName, compressionLevel); + int const result = LZ4IO_compressFilename_extRess(ress, srcFileName, dstFileName, compressionLevel); /* Free resources */ LZ4IO_freeCResources(ress); /* Final Status */ - { clock_t const end = clock(); - double const seconds = (double)(end - start) / CLOCKS_PER_SEC; - DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds); + { clock_t const cpuEnd = clock(); + double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC; + U64 const timeLength_ns = UTIL_clockSpanNano(timeStart); + double const timeLength_s = (double)timeLength_ns / 1000000000; + DISPLAYLEVEL(4, "Completed in %.2f sec (cpu load : %.0f%%)\n", + timeLength_s, (cpuLoad_s / timeLength_s) * 100); } - return issueWithSrcFile; + return result; } From b2e56d82bf73f50484c9cf71e81c1af4ebe3cc0b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 5 Sep 2018 16:04:05 -0700 Subject: [PATCH 32/45] Introduced constants LZ4F_INIT_* to simplify initialization of lz4frame.h structures. Partially in response to #546. --- examples/frameCompress.c | 21 +++++++++++---------- lib/lz4frame.h | 26 +++++++++++++++++--------- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/examples/frameCompress.c b/examples/frameCompress.c index 9bfea483f..6580e9642 100644 --- a/examples/frameCompress.c +++ b/examples/frameCompress.c @@ -31,12 +31,13 @@ static const LZ4F_preferences_t kPrefs = { static void safe_fwrite(void* buf, size_t eltSize, size_t nbElt, FILE* f) { size_t const writtenSize = fwrite(buf, eltSize, nbElt, f); - size_t const expectedSize = eltSize * nbElt; /* note : should check for overflow */ + size_t const expectedSize = eltSize * nbElt; + assert(expectedSize / nbElt == eltSize); /* check overflow */ if (writtenSize < expectedSize) { if (ferror(f)) /* note : ferror() must follow fwrite */ - printf("Write failed\n"); + fprintf(stderr, "Write failed \n"); else - printf("Short write\n"); + fprintf(stderr, "Short write \n"); exit(1); } } @@ -54,9 +55,9 @@ typedef struct { static compressResult_t compress_file_internal(FILE* f_in, FILE* f_out, - LZ4F_compressionContext_t ctx, - void* inBuff, size_t inChunkSize, - void* outBuff, size_t outCapacity) + LZ4F_compressionContext_t ctx, + void* inBuff, size_t inChunkSize, + void* outBuff, size_t outCapacity) { compressResult_t result = { 1, 0, 0 }; /* result for an error */ unsigned long long count_in = 0, count_out; @@ -167,9 +168,9 @@ static size_t get_block_size(const LZ4F_frameInfo_t* info) { /* @return : 1==error, 0==success */ static int decompress_file_internal(FILE* f_in, FILE* f_out, - LZ4F_dctx* dctx, - void* src, size_t srcCapacity, size_t filled, size_t alreadyConsumed, - void* dst, size_t dstCapacity) + LZ4F_dctx* dctx, + void* src, size_t srcCapacity, size_t filled, size_t alreadyConsumed, + void* dst, size_t dstCapacity) { int firstChunk = 1; size_t ret = 1; @@ -278,7 +279,7 @@ static int decompress_file(FILE* f_in, FILE* f_out) if (!src) { perror("decompress_file(src)"); return 1; } LZ4F_dctx* dctx; - { size_t const dctxStatus = LZ4F_createDecompressionContext(&dctx, 100); + { size_t const dctxStatus = LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION); if (LZ4F_isError(dctxStatus)) { printf("LZ4F_dctx creation error: %s\n", LZ4F_getErrorName(dctxStatus)); } } diff --git a/lib/lz4frame.h b/lib/lz4frame.h index fb434ff76..2a67c69e8 100644 --- a/lib/lz4frame.h +++ b/lib/lz4frame.h @@ -33,7 +33,7 @@ */ /* LZ4F is a stand-alone API to create LZ4-compressed frames - * conformant with specification v1.5.1. + * conformant with specification v1.6.1. * It also offers streaming capabilities. * lz4.h is not required when using lz4frame.h. * */ @@ -159,8 +159,9 @@ typedef LZ4F_contentChecksum_t contentChecksum_t; /*! LZ4F_frameInfo_t : * makes it possible to set or read frame parameters. - * It's not required to set all fields, as long as the structure was initially memset() to zero. - * For all fields, 0 sets it to default value */ + * Structure must be first init to 0, using memset() or LZ4F_INIT_FRAMEINFO, + * setting all parameters to default. + * It's then possible to update selectively some parameters */ typedef struct { LZ4F_blockSizeID_t blockSizeID; /* max64KB, max256KB, max1MB, max4MB; 0 == default */ LZ4F_blockMode_t blockMode; /* LZ4F_blockLinked, LZ4F_blockIndependent; 0 == default */ @@ -171,24 +172,30 @@ typedef struct { LZ4F_blockChecksum_t blockChecksumFlag; /* 1: each block followed by a checksum of block's compressed data; 0: disabled (default) */ } LZ4F_frameInfo_t; +#define LZ4F_INIT_FRAMEINFO { 0, 0, 0, 0, 0, 0, 0 } + /*! LZ4F_preferences_t : - * makes it possible to supply detailed compression parameters to the stream interface. - * Structure is presumed initially memset() to zero, representing default settings. + * makes it possible to supply advanced compression instructions to streaming interface. + * Structure must be first init to 0, using memset() or LZ4F_INIT_PREFERENCES, + * setting all parameters to default. * All reserved fields must be set to zero. */ typedef struct { LZ4F_frameInfo_t frameInfo; int compressionLevel; /* 0: default (fast mode); values > LZ4HC_CLEVEL_MAX count as LZ4HC_CLEVEL_MAX; values < 0 trigger "fast acceleration" */ - unsigned autoFlush; /* 1: always flush, to reduce usage of internal buffers */ - unsigned favorDecSpeed; /* 1: parser favors decompression speed vs compression ratio. Only works for high compression modes (>= LZ4LZ4HC_CLEVEL_OPT_MIN) */ /* >= v1.8.2 */ + unsigned autoFlush; /* 1: always flush; reduces usage of internal buffers */ + unsigned favorDecSpeed; /* 1: parser favors decompression speed vs compression ratio. Only works for high compression modes (>= LZ4HC_CLEVEL_OPT_MIN) */ /* >= v1.8.2 */ unsigned reserved[3]; /* must be zero for forward compatibility */ } LZ4F_preferences_t; -LZ4FLIB_API int LZ4F_compressionLevel_max(void); +#define LZ4F_INIT_PREFERENCES { LZ4F_INIT_FRAMEINFO, 0, 0, 0, { 0, 0, 0 } } /*-********************************* * Simple compression function ***********************************/ + +LZ4FLIB_API int LZ4F_compressionLevel_max(void); + /*! LZ4F_compressFrameBound() : * Returns the maximum possible compressed size with LZ4F_compressFrame() given srcSize and preferences. * `preferencesPtr` is optional. It can be replaced by NULL, in which case, the function will assume default preferences. @@ -222,8 +229,9 @@ typedef struct { /*--- Resource Management ---*/ -#define LZ4F_VERSION 100 +#define LZ4F_VERSION 100 /* API version, signal an API breaking change */ LZ4FLIB_API unsigned LZ4F_getVersion(void); + /*! LZ4F_createCompressionContext() : * The first thing to do is to create a compressionContext object, which will be used in all compression operations. * This is achieved using LZ4F_createCompressionContext(), which takes as argument a version. From 858b6ad7f3f9aae072bccc1dbc484cbc2793a45a Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 5 Sep 2018 16:45:46 -0700 Subject: [PATCH 33/45] frameCompress : added an error detection case check for potential input data not consumed. --- examples/frameCompress.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/examples/frameCompress.c b/examples/frameCompress.c index a97dc723c..a0c5d3d80 100644 --- a/examples/frameCompress.c +++ b/examples/frameCompress.c @@ -195,7 +195,7 @@ decompress_file_internal(FILE* f_in, FILE* f_out, * Continue while there is more input to read (srcPtr != srcEnd) * and the frame isn't over (ret != 0) */ - while (srcPtr != srcEnd && ret != 0) { + while (srcPtr < srcEnd && ret != 0) { /* Any data within dst has been flushed at this stage */ size_t dstSize = dstCapacity; size_t srcSize = srcEnd - srcPtr; @@ -209,9 +209,20 @@ decompress_file_internal(FILE* f_in, FILE* f_out, /* Update input */ srcPtr += srcSize; } + + assert(srcPtr <= srcEnd); + + /* Ensure all input data has been consumed. + * It is valid to have multiple frames in the same file, + * but this example only supports one frame. + */ + if (srcPtr < srcEnd) { + printf("Decompress: Trailing data left in file after frame\n"); + return 1; + } } - /* Check that there isn't trailing input data after the frame. + /* Check that there isn't trailing data in the file after the frame. * It is valid to have multiple frames in the same file, * but this example only supports one frame. */ From 26c42d7ad1c394538adb76879ec8d209b3ff5c84 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 5 Sep 2018 17:57:15 -0700 Subject: [PATCH 34/45] added comments on version numbers --- lib/lz4frame.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/lz4frame.h b/lib/lz4frame.h index 2a67c69e8..75f1fd91b 100644 --- a/lib/lz4frame.h +++ b/lib/lz4frame.h @@ -35,7 +35,8 @@ /* LZ4F is a stand-alone API to create LZ4-compressed frames * conformant with specification v1.6.1. * It also offers streaming capabilities. - * lz4.h is not required when using lz4frame.h. + * lz4.h is not required when using lz4frame.h, + * except to get constant such as LZ4_VERSION_NUMBER. * */ #ifndef LZ4F_H_09782039843 @@ -172,7 +173,7 @@ typedef struct { LZ4F_blockChecksum_t blockChecksumFlag; /* 1: each block followed by a checksum of block's compressed data; 0: disabled (default) */ } LZ4F_frameInfo_t; -#define LZ4F_INIT_FRAMEINFO { 0, 0, 0, 0, 0, 0, 0 } +#define LZ4F_INIT_FRAMEINFO { 0, 0, 0, 0, 0, 0, 0 } /* v1.8.3+ */ /*! LZ4F_preferences_t : * makes it possible to supply advanced compression instructions to streaming interface. @@ -183,11 +184,11 @@ typedef struct { LZ4F_frameInfo_t frameInfo; int compressionLevel; /* 0: default (fast mode); values > LZ4HC_CLEVEL_MAX count as LZ4HC_CLEVEL_MAX; values < 0 trigger "fast acceleration" */ unsigned autoFlush; /* 1: always flush; reduces usage of internal buffers */ - unsigned favorDecSpeed; /* 1: parser favors decompression speed vs compression ratio. Only works for high compression modes (>= LZ4HC_CLEVEL_OPT_MIN) */ /* >= v1.8.2 */ + unsigned favorDecSpeed; /* 1: parser favors decompression speed vs compression ratio. Only works for high compression modes (>= LZ4HC_CLEVEL_OPT_MIN) */ /* v1.8.2+ */ unsigned reserved[3]; /* must be zero for forward compatibility */ } LZ4F_preferences_t; -#define LZ4F_INIT_PREFERENCES { LZ4F_INIT_FRAMEINFO, 0, 0, 0, { 0, 0, 0 } } +#define LZ4F_INIT_PREFERENCES { LZ4F_INIT_FRAMEINFO, 0, 0, 0, { 0, 0, 0 } } /* v1.8.3+ */ /*-********************************* @@ -229,7 +230,7 @@ typedef struct { /*--- Resource Management ---*/ -#define LZ4F_VERSION 100 /* API version, signal an API breaking change */ +#define LZ4F_VERSION 100 /* This number can be used to check for an incompatible API breaking change */ LZ4FLIB_API unsigned LZ4F_getVersion(void); /*! LZ4F_createCompressionContext() : From e32766cc345fb811cdcf25192144c85734bc636e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 7 Sep 2018 11:02:42 -0700 Subject: [PATCH 35/45] updated API documentation --- lib/lz4.c | 4 +-- lib/lz4.h | 102 ++++++++++++++++++++++++++++++------------------------ 2 files changed, 58 insertions(+), 48 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index 05454fc09..35df7f5a4 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -1394,8 +1394,8 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize) * Note that it is important for performance that this function really get inlined, * in order to remove useless branches during compilation optimization. */ -LZ4_FORCE_O2_GCC_PPC64LE -LZ4_FORCE_INLINE int LZ4_decompress_generic( +LZ4_FORCE_INLINE int +LZ4_decompress_generic( const char* const src, char* const dst, int srcSize, diff --git a/lib/lz4.h b/lib/lz4.h index 491b67a2a..9d3890a61 100644 --- a/lib/lz4.h +++ b/lib/lz4.h @@ -183,55 +183,58 @@ LZ4_compress_fast_extState() : Same compression function, just using an externally allocated memory space to store compression state. Use LZ4_sizeofState() to know how much memory must be allocated, and allocate it on 8-bytes boundaries (using malloc() typically). - Then, provide it as 'void* state' to compression function. + Then, provide this buffer as 'void* state' to compression function. */ LZ4LIB_API int LZ4_sizeofState(void); LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); -/*! -LZ4_compress_destSize() : - Reverse the logic : compresses as much data as possible from 'src' buffer - into already allocated buffer 'dst' of size 'targetDestSize'. - This function either compresses the entire 'src' content into 'dst' if it's large enough, - or fill 'dst' buffer completely with as much data as possible from 'src'. - *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'. - New value is necessarily <= old value. - return : Nb bytes written into 'dst' (necessarily <= targetDestSize) - or 0 if compression fails +/*! LZ4_compress_destSize() : + * Reverse the logic : compresses as much data as possible from 'src' buffer + * into already allocated buffer 'dst', of size >= 'targetDestSize'. + * This function either compresses the entire 'src' content into 'dst' if it's large enough, + * or fill 'dst' buffer completely with as much data as possible from 'src'. + * note: acceleration parameter is fixed to "default". + * + * *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'. + * New value is necessarily <= input value. + * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize) + * or 0 if compression fails. */ LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize); -/*! -LZ4_decompress_fast() : **unsafe!** -This function is a bit faster than LZ4_decompress_safe(), -but it may misbehave on malformed input because it doesn't perform full validation of compressed data. - originalSize : is the uncompressed size to regenerate - Destination buffer must be already allocated, and its size must be >= 'originalSize' bytes. - return : number of bytes read from source buffer (== compressed size). - If the source stream is detected malformed, the function stops decoding and return a negative result. - note : This function is only usable if the originalSize of uncompressed data is known in advance. - The caller should also check that all the compressed input has been consumed properly, - i.e. that the return value matches the size of the buffer with compressed input. - The function never writes past the output buffer. However, since it doesn't know its 'src' size, - it may read past the intended input. Also, because match offsets are not validated during decoding, - reads from 'src' may underflow. Use this function in trusted environment **only**. -*/ +/*! LZ4_decompress_fast() : **unsafe!** + * This function used to be a bit faster than LZ4_decompress_safe(), + * though situation has changed in recent versions, + * and now `LZ4_decompress_safe()` can be as fast and sometimes faster than `LZ4_decompress_fast()`. + * Moreover, LZ4_decompress_fast() is not protected vs malformed input, as it doesn't perform full validation of compressed data. + * As a consequence, this function is no longer recommended, and may be deprecated in future versions. + * It's only remaining specificity is that it can decompress data without knowing its compressed size. + * + * originalSize : is the uncompressed size to regenerate. + * `dst` must be already allocated, its size must be >= 'originalSize' bytes. + * @return : number of bytes read from source buffer (== compressed size). + * If the source stream is detected malformed, the function stops decoding and returns a negative result. + * note : This function requires uncompressed originalSize to be known in advance. + * The function never writes past the output buffer. + * However, since it doesn't know its 'src' size, it may read past the intended input. + * Also, because match offsets are not validated during decoding, + * reads from 'src' may underflow. + * Use this function in trusted environment **only**. + */ LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize); -/*! -LZ4_decompress_safe_partial() : - This function decompress a compressed block of size 'srcSize' at position 'src' - into destination buffer 'dst' of size 'dstCapacity'. - The function will decompress a minimum of 'targetOutputSize' bytes, and stop after that. - However, it's not accurate, and may write more than 'targetOutputSize' (but always <= dstCapacity). - @return : the number of bytes decoded in the destination buffer (necessarily <= dstCapacity) - Note : this number can also be < targetOutputSize, if compressed block contains less data. - Therefore, always control how many bytes were decoded. - If source stream is detected malformed, function returns a negative result. - This function is protected against malicious data packets. -*/ +/*! LZ4_decompress_safe_partial() : + * This function decompresses a compressed block of size 'srcSize' at position 'src' + * into destination buffer 'dst' of size 'dstCapacity'. + * The function will decompress a minimum of 'targetOutputSize' bytes, and stop after that. + * However, it's not accurate, and may write more than 'targetOutputSize' (but always <= dstCapacity). + * @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity) + * Note : this number can also be < targetOutputSize, if compressed block contains less data. + * If source stream is detected malformed, function returns a negative result. + * This function is protected against malicious data packets. + */ LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity); @@ -266,16 +269,23 @@ LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, in * 'dst' buffer must be already allocated. * If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster. * - * Important : The previous 64KB of source data is assumed to remain present and unmodified in memory! - * - * Special 1 : When input is a double-buffer, they can have any size, including < 64 KB. - * Make sure that buffers are separated by at least one byte. - * This way, each block only depends on previous block. - * Special 2 : If input buffer is a ring-buffer, it can have any size, including < 64 KB. - * * @return : size of compressed block * or 0 if there is an error (typically, cannot fit into 'dst'). - * After an error, the stream status is invalid, it can only be reset or freed. + * + * Note 1 : Each invocation to LZ4_compress_fast_continue() will generate a new block. + * Each block has precise boundaries. + * It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together. + * Each block must be decompressed separately, calling LZ4_decompress_*() with associated metadata. + * + * Note 2 : The previous 64KB of source data is assumed to remain present, unmodified, at same address in memory! + * + * Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB. + * Make sure that buffers are separated, by at least one byte. + * This construction ensures that each block only depends on previous block. + * + * Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB. + * + * Note 5 : After an error, the stream status is invalid, it can only be reset or freed. */ LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); From bf614d3c51c9774df0f64285db314545f05bb5ef Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 7 Sep 2018 15:44:19 -0700 Subject: [PATCH 36/45] first sketch for a byte-accurate partial decoder --- lib/lz4.c | 126 +++++++++++++++++++++++++++++----------------- lib/lz4.h | 4 +- tests/fullbench.c | 32 ++++++++---- tests/fuzzer.c | 22 ++++---- 4 files changed, 113 insertions(+), 71 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index 35df7f5a4..6febb90d1 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -483,9 +483,6 @@ typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t; typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive; typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; -typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; -typedef enum { full = 0, partial = 1 } earlyEnd_directive; - /*-************************************ * Local Utils @@ -684,9 +681,9 @@ LZ4_FORCE_INLINE int LZ4_compress_generic( /* the dictCtx currentOffset is indexed on the start of the dictionary, * while a dictionary in the current context precedes the currentOffset */ - const BYTE* dictBase = dictDirective == usingDictCtx ? - dictionary + dictSize - dictCtx->currentOffset : - dictionary + dictSize - startIndex; + const BYTE* dictBase = (dictDirective == usingDictCtx) ? + dictionary + dictSize - dictCtx->currentOffset : + dictionary + dictSize - startIndex; BYTE* op = (BYTE*) dest; BYTE* const olimit = op + maxOutputSize; @@ -1385,25 +1382,32 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize) -/*-***************************** -* Decompression functions -*******************************/ +/*-******************************* + * Decompression functions + ********************************/ + +typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; +typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; + +#undef MIN +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + /*! LZ4_decompress_generic() : * This generic decompression function covers all use cases. * It shall be instantiated several times, using different sets of directives. * Note that it is important for performance that this function really get inlined, * in order to remove useless branches during compilation optimization. */ -LZ4_FORCE_INLINE int +LZ4_FORCE_INLINE +int LZ4_decompress_generic( const char* const src, char* const dst, int srcSize, int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */ - int endOnInput, /* endOnOutputSize, endOnInputSize */ - int partialDecoding, /* full, partial */ - int targetOutputSize, /* only used if partialDecoding==partial */ + endCondition_directive endOnInput, /* endOnOutputSize, endOnInputSize */ + earlyEnd_directive partialDecoding, /* full, partial */ int dict, /* noDict, withPrefix64k, usingExtDict */ const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */ const BYTE* const dictStart, /* only if dict==usingExtDict */ @@ -1416,7 +1420,6 @@ LZ4_decompress_generic( BYTE* op = (BYTE*) dst; BYTE* const oend = op + outputSize; BYTE* cpy; - BYTE* oexit = op + targetOutputSize; const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize; const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4}; @@ -1432,9 +1435,9 @@ LZ4_decompress_generic( DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i)", srcSize); /* Special cases */ - if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => just decode everything */ + assert(src != NULL); if ((endOnInput) && (unlikely(outputSize==0))) return ((srcSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */ - if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1); + if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0 ? 1 : -1); if ((endOnInput) && unlikely(srcSize==0)) return -1; /* Main Loop : decode sequences */ @@ -1443,7 +1446,7 @@ LZ4_decompress_generic( size_t offset; unsigned const token = *ip++; - size_t length = token >> ML_BITS; /* literal length */ + size_t length = token >> ML_BITS; /* literal length */ assert(!endOnInput || ip <= iend); /* ip < iend before the increment */ @@ -1468,6 +1471,7 @@ LZ4_decompress_generic( length = token & ML_MASK; /* match length */ offset = LZ4_readLE16(ip); ip += 2; match = op - offset; + assert(match <= op); /* check overflow */ /* Do not deal with overlapping matches. */ if ( (length != ML_MASK) @@ -1501,11 +1505,11 @@ LZ4_decompress_generic( /* copy literals */ cpy = op+length; - if ( ((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) ) - || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) ) + if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) ) + || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) ) { if (partialDecoding) { - if (cpy > oend) goto _output_error; /* Error : write attempt beyond end of output buffer */ + if (cpy > oend) { cpy = oend; length = oend-op; } /* Partial decoding : stop in the middle of literal segment */ if ((endOnInput) && (ip+length > iend)) goto _output_error; /* Error : read attempt beyond end of input buffer */ } else { if ((!endOnInput) && (cpy != oend)) goto _output_error; /* Error : block decoding must stop exactly there */ @@ -1514,10 +1518,15 @@ LZ4_decompress_generic( memcpy(op, ip, length); ip += length; op += length; - break; /* Necessarily EOF, due to parsing restrictions */ + if (!partialDecoding || (cpy == oend)) { + /* Necessarily EOF, due to parsing restrictions */ + break; + } + + } else { + LZ4_wildCopy(op, ip, cpy); + ip += length; op = cpy; } - LZ4_wildCopy(op, ip, cpy); - ip += length; op = cpy; /* get offset */ offset = LZ4_readLE16(ip); ip+=2; @@ -1541,21 +1550,24 @@ LZ4_decompress_generic( } length += MINMATCH; - /* check external dictionary */ + /* match starting within external dictionary */ if ((dict==usingExtDict) && (match < lowPrefix)) { - if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error; /* doesn't respect parsing restriction */ + if (unlikely(op+length > oend-LASTLITERALS)) { + if (partialDecoding) length = MIN(length, (size_t)(oend-op)); + else goto _output_error; /* doesn't respect parsing restriction */ + } if (length <= (size_t)(lowPrefix-match)) { - /* match can be copied as a single segment from external dictionary */ + /* match fits entirely within external dictionary : just copy */ memmove(op, dictEnd - (lowPrefix-match), length); op += length; } else { - /* match encompass external dictionary and current block */ - size_t const copySize = (size_t)(lowPrefix-match); + /* match stretches into both external dictionary and current block */ + size_t const copySize = (size_t)(lowPrefix - match); size_t const restSize = length - copySize; memcpy(op, dictEnd - copySize, copySize); op += copySize; - if (restSize > (size_t)(op-lowPrefix)) { /* overlap copy */ + if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ BYTE* const endOfMatch = op + restSize; const BYTE* copyFrom = lowPrefix; while (op < endOfMatch) *op++ = *copyFrom++; @@ -1568,6 +1580,22 @@ LZ4_decompress_generic( /* copy match within block */ cpy = op + length; + + /* specific : partial decode : does not respect end parsing restrictions */ + assert(op<=oend); + if (partialDecoding && (cpy > oend-12)) { + size_t const mlen = MIN(length, (size_t)(oend-op)); + const BYTE* const matchEnd = match + mlen; + BYTE* const copyEnd = op + mlen; + if (matchEnd > op) { /* overlap copy */ + while (op < copyEnd) *op++ = *match++; + } else { + memcpy(op, match, mlen); + } + op = copyEnd; + continue; + } + if (unlikely(offset<8)) { op[0] = match[0]; op[1] = match[1]; @@ -1576,23 +1604,26 @@ LZ4_decompress_generic( match += inc32table[offset]; memcpy(op+4, match, 4); match -= dec64table[offset]; - } else { memcpy(op, match, 8); match+=8; } + } else { + memcpy(op, match, 8); + match += 8; + } op += 8; - if (unlikely(cpy>oend-12)) { - BYTE* const oCopyLimit = oend-(WILDCOPYLENGTH-1); + if (unlikely(cpy > oend-12)) { + BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1); if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ if (op < oCopyLimit) { LZ4_wildCopy(op, match, oCopyLimit); match += oCopyLimit - op; op = oCopyLimit; } - while (op16) LZ4_wildCopy(op+8, match+8, cpy); + if (length > 16) LZ4_wildCopy(op+8, match+8, cpy); } - op = cpy; /* correction */ + op = cpy; /* wildcopy correction */ } /* end of decoding */ @@ -1613,23 +1644,24 @@ LZ4_FORCE_O2_GCC_PPC64LE int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, - endOnInputSize, full, 0, noDict, + endOnInputSize, decode_full_block, noDict, (BYTE*)dest, NULL, 0); } LZ4_FORCE_O2_GCC_PPC64LE -int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize) +int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity) { - return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, - endOnInputSize, partial, targetOutputSize, - noDict, (BYTE*)dest, NULL, 0); + dstCapacity = MIN(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity, + endOnInputSize, partial_decode, + noDict, (BYTE*)dst, NULL, 0); } LZ4_FORCE_O2_GCC_PPC64LE int LZ4_decompress_fast(const char* source, char* dest, int originalSize) { return LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, full, 0, withPrefix64k, + endOnOutputSize, decode_full_block, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 0); } @@ -1639,7 +1671,7 @@ LZ4_FORCE_O2_GCC_PPC64LE /* Exported, an obsolete API function. */ int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - endOnInputSize, full, 0, withPrefix64k, + endOnInputSize, decode_full_block, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 0); } @@ -1656,7 +1688,7 @@ static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, i size_t prefixSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - endOnInputSize, full, 0, noDict, + endOnInputSize, decode_full_block, noDict, (BYTE*)dest-prefixSize, NULL, 0); } @@ -1666,7 +1698,7 @@ int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, const void* dictStart, size_t dictSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - endOnInputSize, full, 0, usingExtDict, + endOnInputSize, decode_full_block, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize); } @@ -1675,7 +1707,7 @@ static int LZ4_decompress_fast_extDict(const char* source, char* dest, int origi const void* dictStart, size_t dictSize) { return LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, full, 0, usingExtDict, + endOnOutputSize, decode_full_block, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize); } @@ -1688,7 +1720,7 @@ int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compresse size_t prefixSize, const void* dictStart, size_t dictSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - endOnInputSize, full, 0, usingExtDict, + endOnInputSize, decode_full_block, usingExtDict, (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize); } @@ -1697,7 +1729,7 @@ int LZ4_decompress_fast_doubleDict(const char* source, char* dest, int originalS size_t prefixSize, const void* dictStart, size_t dictSize) { return LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, full, 0, usingExtDict, + endOnOutputSize, decode_full_block, usingExtDict, (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize); } diff --git a/lib/lz4.h b/lib/lz4.h index 9d3890a61..ce4d033c4 100644 --- a/lib/lz4.h +++ b/lib/lz4.h @@ -272,12 +272,12 @@ LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, in * @return : size of compressed block * or 0 if there is an error (typically, cannot fit into 'dst'). * - * Note 1 : Each invocation to LZ4_compress_fast_continue() will generate a new block. + * Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block. * Each block has precise boundaries. * It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together. * Each block must be decompressed separately, calling LZ4_decompress_*() with associated metadata. * - * Note 2 : The previous 64KB of source data is assumed to remain present, unmodified, at same address in memory! + * Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory! * * Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB. * Make sure that buffers are separated, by at least one byte. diff --git a/tests/fullbench.c b/tests/fullbench.c index 2818ea28b..fd1202df1 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -317,7 +317,9 @@ static int local_LZ4_decompress_safe_forceExtDict(const char* in, char* out, int static int local_LZ4_decompress_safe_partial(const char* in, char* out, int inSize, int outSize) { - return LZ4_decompress_safe_partial(in, out, inSize, outSize - 5, outSize); + int result = LZ4_decompress_safe_partial(in, out, inSize, outSize - 5, outSize); + if (result < 0) return result; + return outSize; } @@ -462,9 +464,9 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles) case 12: compressionFunction = local_LZ4_compress_HC_extStateHC; compressorName = "LZ4_compress_HC_extStateHC"; break; case 14: compressionFunction = local_LZ4_compress_HC_continue; initFunction = local_LZ4_resetStreamHC; compressorName = "LZ4_compress_HC_continue"; break; #ifndef LZ4_DLL_IMPORT - case 20: compressionFunction = local_LZ4_compress_forceDict; initFunction = local_LZ4_resetDictT; compressorName = "LZ4_compress_forceDict"; break; + case 20: compressionFunction = local_LZ4_compress_forceDict; initFunction = local_LZ4_resetDictT; compressorName = "LZ4_compress_forceDict"; break; #endif - case 30: compressionFunction = local_LZ4F_compressFrame; compressorName = "LZ4F_compressFrame"; + case 30: compressionFunction = local_LZ4F_compressFrame; compressorName = "LZ4F_compressFrame"; chunkP[0].origSize = (int)benchedSize; nbChunks=1; break; case 40: compressionFunction = local_LZ4_saveDict; compressorName = "LZ4_saveDict"; @@ -542,6 +544,7 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles) const char* dName; int (*decompressionFunction)(const char*, char*, int, int); double bestTime = 100000000.; + int checkResult = 1; if ((g_decompressionAlgo != ALL_DECOMPRESSORS) && (g_decompressionAlgo != dAlgNb)) continue; @@ -553,11 +556,11 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles) case 3: decompressionFunction = local_LZ4_decompress_fast_usingExtDict; dName = "LZ4_decompress_fast_using(Ext)Dict"; break; case 4: decompressionFunction = LZ4_decompress_safe; dName = "LZ4_decompress_safe"; break; case 6: decompressionFunction = local_LZ4_decompress_safe_usingDict; dName = "LZ4_decompress_safe_usingDict"; break; - case 7: decompressionFunction = local_LZ4_decompress_safe_partial; dName = "LZ4_decompress_safe_partial"; break; + case 7: decompressionFunction = local_LZ4_decompress_safe_partial; dName = "LZ4_decompress_safe_partial"; checkResult = 0; break; #ifndef LZ4_DLL_IMPORT - case 8: decompressionFunction = local_LZ4_decompress_safe_forceExtDict; dName = "LZ4_decompress_safe_forceExtDict"; break; + case 8: decompressionFunction = local_LZ4_decompress_safe_forceExtDict; dName = "LZ4_decompress_safe_forceExtDict"; break; #endif - case 9: decompressionFunction = local_LZ4F_decompress; dName = "LZ4F_decompress"; + case 9: decompressionFunction = local_LZ4F_decompress; dName = "LZ4F_decompress"; errorCode = LZ4F_compressFrame(compressed_buff, compressedBuffSize, orig_buff, benchedSize, NULL); if (LZ4F_isError(errorCode)) { DISPLAY("Error while preparing compressed frame\n"); @@ -589,9 +592,13 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles) clockTime = clock(); while(BMK_GetClockSpan(clockTime) < TIMELOOP) { for (chunkNb=0; chunkNb %7.1f MB/s\n", dAlgNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000); } diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 6c7951596..bdb784113 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -504,7 +504,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c /* Test decoding with empty input */ FUZ_DISPLAYTEST("LZ4_decompress_safe() with empty input"); - LZ4_decompress_safe(NULL, decodedBuffer, 0, blockSize); + LZ4_decompress_safe(compressedBuffer, decodedBuffer, 0, blockSize); /* Test decoding with a one byte input */ FUZ_DISPLAYTEST("LZ4_decompress_safe() with one byte input"); @@ -545,7 +545,6 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, blockSize+1); FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe failed despite amply sufficient space"); FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe did not regenerate original data"); - //FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe wrote more than (unknown) target size"); // well, is that an issue ? FUZ_CHECKTEST(decodedBuffer[blockSize+1], "LZ4_decompress_safe overrun specified output buffer size"); { U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0); FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe corrupted decoded data"); @@ -579,15 +578,16 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to input size being too large"); FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe overrun specified output buffer size"); - // Test partial decoding with target output size being max/2 => must work - FUZ_DISPLAYTEST(); - ret = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, compressedSize, blockSize/2, blockSize); - FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe_partial failed despite sufficient space"); - - // Test partial decoding with target output size being just below max => must work - FUZ_DISPLAYTEST(); - ret = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, compressedSize, blockSize-3, blockSize); - FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe_partial failed despite sufficient space"); + /* Test partial decoding => must work */ + FUZ_DISPLAYTEST("test LZ4_decompress_safe_partial"); + { size_t const missingBytes = FUZ_rand(&randState) % blockSize; + int const targetSize = (int)(blockSize - missingBytes); + char const sentinel = compressedBuffer[targetSize] = block[targetSize] ^ 0x5A; + int const decResult = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, compressedSize, targetSize, blockSize); + FUZ_CHECKTEST(decResult<0, "LZ4_decompress_safe_partial failed despite valid input data"); + FUZ_CHECKTEST(decResult != targetSize, "LZ4_decompress_safe_partial did not regenerated required amount of data (%i < %i <= %i)", decResult, targetSize, blockSize); + FUZ_CHECKTEST(compressedBuffer[targetSize] != sentinel, "LZ4_decompress_safe_partial overwrite beyond requested size (though %i <= %i <= %i)", decResult, targetSize, blockSize); + } /* Test Compression with limited output size */ From eaed9ea4a15934e59f74902f0ed692a07151579d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 7 Sep 2018 16:21:31 -0700 Subject: [PATCH 37/45] updated function interface documentation --- doc/lz4_manual.html | 105 ++++++++++++++++++++++++++++---------------- lib/lz4.h | 24 +++++++--- 2 files changed, 86 insertions(+), 43 deletions(-) diff --git a/doc/lz4_manual.html b/doc/lz4_manual.html index c7c576346..6ebf8d281 100644 --- a/doc/lz4_manual.html +++ b/doc/lz4_manual.html @@ -110,45 +110,67 @@

      1.8.3 Manual

      Same compression function, just using an externally allocated memory space to store compression state. Use LZ4_sizeofState() to know how much memory must be allocated, and allocate it on 8-bytes boundaries (using malloc() typically). - Then, provide it as 'void* state' to compression function. + Then, provide this buffer as 'void* state' to compression function.


int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);
-

Reverse the logic : compresses as much data as possible from 'src' buffer - into already allocated buffer 'dst' of size 'targetDestSize'. - This function either compresses the entire 'src' content into 'dst' if it's large enough, - or fill 'dst' buffer completely with as much data as possible from 'src'. - *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'. - New value is necessarily <= old value. - return : Nb bytes written into 'dst' (necessarily <= targetDestSize) - or 0 if compression fails +

Reverse the logic : compresses as much data as possible from 'src' buffer + into already allocated buffer 'dst', of size >= 'targetDestSize'. + This function either compresses the entire 'src' content into 'dst' if it's large enough, + or fill 'dst' buffer completely with as much data as possible from 'src'. + note: acceleration parameter is fixed to "default". + + *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'. + New value is necessarily <= input value. + @return : Nb bytes written into 'dst' (necessarily <= targetDestSize) + or 0 if compression fails.


int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
-

This function is a bit faster than LZ4_decompress_safe(), -but it may misbehave on malformed input because it doesn't perform full validation of compressed data. - originalSize : is the uncompressed size to regenerate - Destination buffer must be already allocated, and its size must be >= 'originalSize' bytes. - return : number of bytes read from source buffer (== compressed size). - If the source stream is detected malformed, the function stops decoding and return a negative result. - note : This function is only usable if the originalSize of uncompressed data is known in advance. - The caller should also check that all the compressed input has been consumed properly, - i.e. that the return value matches the size of the buffer with compressed input. - The function never writes past the output buffer. However, since it doesn't know its 'src' size, - it may read past the intended input. Also, because match offsets are not validated during decoding, - reads from 'src' may underflow. Use this function in trusted environment **only**. +

This function used to be a bit faster than LZ4_decompress_safe(), + though situation has changed in recent versions, + and now `LZ4_decompress_safe()` can be as fast and sometimes faster than `LZ4_decompress_fast()`. + Moreover, LZ4_decompress_fast() is not protected vs malformed input, as it doesn't perform full validation of compressed data. + As a consequence, this function is no longer recommended, and may be deprecated in future versions. + It's only remaining specificity is that it can decompress data without knowing its compressed size. + + originalSize : is the uncompressed size to regenerate. + `dst` must be already allocated, its size must be >= 'originalSize' bytes. + @return : number of bytes read from source buffer (== compressed size). + If the source stream is detected malformed, the function stops decoding and returns a negative result. + note : This function requires uncompressed originalSize to be known in advance. + The function never writes past the output buffer. + However, since it doesn't know its 'src' size, it may read past the intended input. + Also, because match offsets are not validated during decoding, + reads from 'src' may underflow. + Use this function in trusted environment **only**. +


int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
-

This function decompress a compressed block of size 'srcSize' at position 'src' - into destination buffer 'dst' of size 'dstCapacity'. - The function will decompress a minimum of 'targetOutputSize' bytes, and stop after that. - However, it's not accurate, and may write more than 'targetOutputSize' (but always <= dstCapacity). - @return : the number of bytes decoded in the destination buffer (necessarily <= dstCapacity) - Note : this number can also be < targetOutputSize, if compressed block contains less data. - Therefore, always control how many bytes were decoded. - If source stream is detected malformed, function returns a negative result. - This function is protected against malicious data packets. +

Decompress an LZ4 compressed block, of size 'srcSize' at position 'src', + into destination buffer 'dst' of size 'dstCapacity'. + Up to 'targetOutputSize' bytes will be decoded. + The function stops decoding on reaching this objective, + which can boost performance when only the beginning of a block is required. + + @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity) + If source stream is detected malformed, function returns a negative result. + + Note : @return can be < targetOutputSize, if compressed block contains less data. + + Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity, + and expects targetOutputSize <= dstCapacity. + It effectively stops decoding on reaching targetOutputSize, + so dstCapacity is kind of redundant. + This is because in a previous version of this function, + decoding operation would not "break" a sequence in the middle. + As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize, + it could write more bytes, though only up to dstCapacity. + Some "margin" used to be required for this operation to work properly. + This is no longer necessary. + The function nonetheless keeps its signature, in an effort to not break API. +


Streaming Compression Functions


@@ -179,16 +201,23 @@ 

1.8.3 Manual

'dst' buffer must be already allocated. If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster. - Important : The previous 64KB of source data is assumed to remain present and unmodified in memory! - - Special 1 : When input is a double-buffer, they can have any size, including < 64 KB. - Make sure that buffers are separated by at least one byte. - This way, each block only depends on previous block. - Special 2 : If input buffer is a ring-buffer, it can have any size, including < 64 KB. - @return : size of compressed block or 0 if there is an error (typically, cannot fit into 'dst'). - After an error, the stream status is invalid, it can only be reset or freed. + + Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block. + Each block has precise boundaries. + It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together. + Each block must be decompressed separately, calling LZ4_decompress_*() with associated metadata. + + Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory! + + Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB. + Make sure that buffers are separated, by at least one byte. + This construction ensures that each block only depends on previous block. + + Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB. + + Note 5 : After an error, the stream status is invalid, it can only be reset or freed.


diff --git a/lib/lz4.h b/lib/lz4.h index ce4d033c4..c8ee206ac 100644 --- a/lib/lz4.h +++ b/lib/lz4.h @@ -226,14 +226,28 @@ LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePt LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize); /*! LZ4_decompress_safe_partial() : - * This function decompresses a compressed block of size 'srcSize' at position 'src' + * Decompress an LZ4 compressed block, of size 'srcSize' at position 'src', * into destination buffer 'dst' of size 'dstCapacity'. - * The function will decompress a minimum of 'targetOutputSize' bytes, and stop after that. - * However, it's not accurate, and may write more than 'targetOutputSize' (but always <= dstCapacity). + * Up to 'targetOutputSize' bytes will be decoded. + * The function stops decoding on reaching this objective, + * which can boost performance when only the beginning of a block is required. + * * @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity) - * Note : this number can also be < targetOutputSize, if compressed block contains less data. * If source stream is detected malformed, function returns a negative result. - * This function is protected against malicious data packets. + * + * Note : @return can be < targetOutputSize, if compressed block contains less data. + * + * Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity, + * and expects targetOutputSize <= dstCapacity. + * It effectively stops decoding on reaching targetOutputSize, + * so dstCapacity is kind of redundant. + * This is because in a previous version of this function, + * decoding operation would not "break" a sequence in the middle. + * As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize, + * it could write more bytes, though only up to dstCapacity. + * Some "margin" used to be required for this operation to work properly. + * This is no longer necessary. + * The function nonetheless keeps its signature, in an effort to not break API. */ LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity); From e22bb8007432d42cda4c83daeb9e97ea15b7ef67 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 7 Sep 2018 18:22:01 -0700 Subject: [PATCH 38/45] fixed fuzzer test and removed one blind copy, since there is no more guarantee that at least 4 bytes are still available in output buffer --- lib/lz4.c | 10 ++++++---- tests/fuzzer.c | 7 ++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index 6febb90d1..dbda4f14d 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -1398,8 +1398,7 @@ typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; * Note that it is important for performance that this function really get inlined, * in order to remove useless branches during compilation optimization. */ -LZ4_FORCE_INLINE -int +LZ4_FORCE_INLINE int LZ4_decompress_generic( const char* const src, char* const dst, @@ -1432,7 +1431,7 @@ LZ4_decompress_generic( const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/; const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/; - DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i)", srcSize); + DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize); /* Special cases */ assert(src != NULL); @@ -1537,7 +1536,7 @@ LZ4_decompress_generic( _copy_match: if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */ - LZ4_write32(op, (U32)offset); /* costs ~1%; silence an msan warning when offset==0 */ + // LZ4_write32(op, (U32)offset); /* costs ~1%; silence an msan warning when offset==0 */ /* note : no longer valid with partialDecoding, since there is no guarantee that at least 4 bytes are available */ if (length == ML_MASK) { unsigned s; @@ -1584,15 +1583,18 @@ LZ4_decompress_generic( /* specific : partial decode : does not respect end parsing restrictions */ assert(op<=oend); if (partialDecoding && (cpy > oend-12)) { + DEBUGLOG(2, "match copy close to the end"); size_t const mlen = MIN(length, (size_t)(oend-op)); const BYTE* const matchEnd = match + mlen; BYTE* const copyEnd = op + mlen; if (matchEnd > op) { /* overlap copy */ while (op < copyEnd) *op++ = *match++; } else { + DEBUGLOG(2, "let's memcopy %zu bytes (non overlapping)", mlen); memcpy(op, match, mlen); } op = copyEnd; + if (op==oend) break; continue; } diff --git a/tests/fuzzer.c b/tests/fuzzer.c index bdb784113..d6a5f511e 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -582,11 +582,12 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c FUZ_DISPLAYTEST("test LZ4_decompress_safe_partial"); { size_t const missingBytes = FUZ_rand(&randState) % blockSize; int const targetSize = (int)(blockSize - missingBytes); - char const sentinel = compressedBuffer[targetSize] = block[targetSize] ^ 0x5A; + char const sentinel = decodedBuffer[targetSize] = block[targetSize] ^ 0x5A; + assert(decodedBuffer[targetSize] == sentinel); int const decResult = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, compressedSize, targetSize, blockSize); - FUZ_CHECKTEST(decResult<0, "LZ4_decompress_safe_partial failed despite valid input data"); + FUZ_CHECKTEST(decResult<0, "LZ4_decompress_safe_partial failed despite valid input data (error:%i)", decResult); FUZ_CHECKTEST(decResult != targetSize, "LZ4_decompress_safe_partial did not regenerated required amount of data (%i < %i <= %i)", decResult, targetSize, blockSize); - FUZ_CHECKTEST(compressedBuffer[targetSize] != sentinel, "LZ4_decompress_safe_partial overwrite beyond requested size (though %i <= %i <= %i)", decResult, targetSize, blockSize); + FUZ_CHECKTEST(decodedBuffer[targetSize] != sentinel, "LZ4_decompress_safe_partial overwrite beyond requested size (though %i <= %i <= %i)", decResult, targetSize, blockSize); } /* Test Compression with limited output size */ From 3318d573ba09cf6d509d69ee2520b4748408248f Mon Sep 17 00:00:00 2001 From: Lucas Holt Date: Sat, 8 Sep 2018 14:46:54 -0400 Subject: [PATCH 39/45] Add support for MidnightBSD --- Makefile | 2 +- lib/Makefile | 2 +- programs/Makefile | 2 +- programs/platform.h | 2 +- tests/Makefile | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 5776b1610..69a34b773 100644 --- a/Makefile +++ b/Makefile @@ -89,7 +89,7 @@ clean: #----------------------------------------------------------------------------- # make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets #----------------------------------------------------------------------------- -ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku)) +ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD)) HOST_OS = POSIX .PHONY: install uninstall diff --git a/lib/Makefile b/lib/Makefile index 6b37839b8..88d9b4f28 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -124,7 +124,7 @@ clean: #----------------------------------------------------------------------------- # make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets #----------------------------------------------------------------------------- -ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku)) +ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD)) .PHONY: listL120 listL120: # extract lines >= 120 characters in *.{c,h}, by Takayuki Matsuoka (note : $$, for Makefile compatibility) diff --git a/programs/Makefile b/programs/Makefile index 98366ad34..bd33d9be0 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -109,7 +109,7 @@ clean: #----------------------------------------------------------------------------- # make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets #----------------------------------------------------------------------------- -ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku)) +ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD)) unlz4: lz4 ln -s lz4$(EXT) unlz4$(EXT) diff --git a/programs/platform.h b/programs/platform.h index db2efac88..c0b384020 100644 --- a/programs/platform.h +++ b/programs/platform.h @@ -81,7 +81,7 @@ extern "C" { #if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)) /* UNIX-like OS */ \ || defined(__midipix__) || defined(__VMS)) # if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1–2001 (SUSv3) conformant */ \ - || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) /* BSD distros */ + || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__MidnightBSD__) /* BSD distros */ # define PLATFORM_POSIX_VERSION 200112L # else # if defined(__linux__) || defined(__linux) diff --git a/tests/Makefile b/tests/Makefile index 16f2867d7..3de111b8d 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -133,7 +133,7 @@ checkTag: checkTag.c $(LZ4DIR)/lz4.h #----------------------------------------------------------------------------- # validated only for Linux, OSX, BSD, Hurd and Solaris targets #----------------------------------------------------------------------------- -ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku)) +ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD)) MD5:=md5sum ifneq (,$(filter $(shell uname), Darwin )) From 72d120bc2fabfae626d588bc4ef86eec1f932d8a Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 10 Sep 2018 12:08:51 -0700 Subject: [PATCH 40/45] switched to circle 2.0 using automated config-translation --- .circleci/config.yml | 108 +++++++++++++++++++++++++++++++++++++++++++ circle.yml | 39 ---------------- 2 files changed, 108 insertions(+), 39 deletions(-) create mode 100644 .circleci/config.yml delete mode 100644 circle.yml diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 000000000..4c08cb2f4 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,108 @@ +# This configuration was automatically generated from a CircleCI 1.0 config. +# It should include any build commands you had along with commands that CircleCI +# inferred from your project structure. We strongly recommend you read all the +# comments in this file to understand the structure of CircleCI 2.0, as the idiom +# for configuration has changed substantially in 2.0 to allow arbitrary jobs rather +# than the prescribed lifecycle of 1.0. In general, we recommend using this generated +# configuration as a reference rather than using it in production, though in most +# cases it should duplicate the execution of your original 1.0 config. +version: 2 +jobs: + build: + working_directory: ~/lz4/lz4 + parallelism: 1 + shell: /bin/bash --login + # CircleCI 2.0 does not support environment variables that refer to each other the same way as 1.0 did. + # If any of these refer to each other, rewrite them so that they don't or see https://circleci.com/docs/2.0/env-vars/#interpolating-environment-variables-to-set-other-environment-variables . + environment: + CIRCLE_ARTIFACTS: /tmp/circleci-artifacts + CIRCLE_TEST_REPORTS: /tmp/circleci-test-results + # In CircleCI 1.0 we used a pre-configured image with a large number of languages and other packages. + # In CircleCI 2.0 you can now specify your own image, or use one of our pre-configured images. + # The following configuration line tells CircleCI to use the specified docker image as the runtime environment for you job. + # We have selected a pre-built image that mirrors the build environment we use on + # the 1.0 platform, but we recommend you choose an image more tailored to the needs + # of each job. For more information on choosing an image (or alternatively using a + # VM instead of a container) see https://circleci.com/docs/2.0/executor-types/ + # To see the list of pre-built images that CircleCI provides for most common languages see + # https://circleci.com/docs/2.0/circleci-images/ + docker: + - image: circleci/build-image:ubuntu-14.04-XXL-upstart-1189-5614f37 + command: /sbin/init + steps: + # Machine Setup + # If you break your build into multiple jobs with workflows, you will probably want to do the parts of this that are relevant in each + # The following `checkout` command checks out your code to your working directory. In 1.0 we did this implicitly. In 2.0 you can choose where in the course of a job your code should be checked out. + - checkout + # Prepare for artifact and test results collection equivalent to how it was done on 1.0. + # In many cases you can simplify this from what is generated here. + # 'See docs on artifact collection here https://circleci.com/docs/2.0/artifacts/' + - run: mkdir -p $CIRCLE_ARTIFACTS $CIRCLE_TEST_REPORTS + # Dependencies + # This would typically go in either a build or a build-and-test job when using workflows + # Restore the dependency cache + - restore_cache: + keys: + # This branch if available + - v1-dep-{{ .Branch }}- + # Default branch if not + - v1-dep-dev- + # Any branch if there are none on the default branch - this should be unnecessary if you have your default branch configured correctly + - v1-dep- + # This is based on your 1.0 configuration file or project settings + - run: sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test; sudo apt-get -y -qq update + - run: sudo apt-get -y install qemu-system-ppc qemu-user-static gcc-powerpc-linux-gnu + - run: sudo apt-get -y install qemu-system-arm gcc-arm-linux-gnueabi libc6-dev-armel-cross gcc-aarch64-linux-gnu libc6-dev-arm64-cross + - run: sudo apt-get -y install libc6-dev-i386 clang gcc-5 gcc-5-multilib gcc-6 valgrind + # Save dependency cache + - save_cache: + key: v1-dep-{{ .Branch }}-{{ epoch }} + paths: + # This is a broad list of cache paths to include many possible development environments + # You can probably delete some of these entries + - vendor/bundle + - ~/virtualenvs + - ~/.m2 + - ~/.ivy2 + - ~/.bundle + - ~/.go_workspace + - ~/.gradle + - ~/.cache/bower + # Test + # This would typically be a build job when using workflows, possibly combined with build + # This is based on your 1.0 configuration file or project settings + - run: clang -v; make clangtest && make clean + - run: g++ -v; make gpptest && make clean + - run: gcc -v; make c_standards && make clean + - run: gcc -v; g++ -v; make ctocpptest && make clean + - run: gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -Werror" make check && make clean + - run: gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean + - run: gcc-6 -v; CC=gcc-6 make c_standards && make clean + - run: gcc-6 -v; CC=gcc-6 MOREFLAGS="-O2 -Werror" make check && make clean + - run: make cmake && make clean + - run: make -C tests test-lz4 + - run: make -C tests test-lz4c + - run: make -C tests test-frametest + - run: make -C tests test-fullbench + - run: make -C tests test-fuzzer && make clean + - run: make -C lib all && make clean + - run: pyenv global 3.4.4; make versionsTest MOREFLAGS=-I/usr/include/x86_64-linux-gnu && make clean + - run: make travis-install && make clean + - run: gcc -v; CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean + - run: make usan && make clean + - run: clang -v; make staticAnalyze && make clean + - run: make -C tests test-mem && make clean + - run: make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static && make clean + - run: make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static MOREFLAGS=-m64 && make clean + - run: make platformTest CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static && make clean + - run: make platformTest CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static && make clean + # Teardown + # If you break your build into multiple jobs with workflows, you will probably want to do the parts of this that are relevant in each + # Save test results + - store_test_results: + path: /tmp/circleci-test-results + # Save artifacts + - store_artifacts: + path: /tmp/circleci-artifacts + - store_artifacts: + path: /tmp/circleci-test-results diff --git a/circle.yml b/circle.yml deleted file mode 100644 index 1602e4977..000000000 --- a/circle.yml +++ /dev/null @@ -1,39 +0,0 @@ -dependencies: - override: - - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test; sudo apt-get -y -qq update - - sudo apt-get -y install qemu-system-ppc qemu-user-static gcc-powerpc-linux-gnu - - sudo apt-get -y install qemu-system-arm gcc-arm-linux-gnueabi libc6-dev-armel-cross gcc-aarch64-linux-gnu libc6-dev-arm64-cross - - sudo apt-get -y install libc6-dev-i386 clang gcc-5 gcc-5-multilib gcc-6 valgrind - -test: - override: - # Tests compilers and C standards - - clang -v; make clangtest && make clean - - g++ -v; make gpptest && make clean - - gcc -v; make c_standards && make clean - - gcc -v; g++ -v; make ctocpptest && make clean - - gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -Werror" make check && make clean - - gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean - - gcc-6 -v; CC=gcc-6 make c_standards && make clean - - gcc-6 -v; CC=gcc-6 MOREFLAGS="-O2 -Werror" make check && make clean -# Shorter tests - - make cmake && make clean - - make -C tests test-lz4 - - make -C tests test-lz4c - - make -C tests test-frametest - - make -C tests test-fullbench - - make -C tests test-fuzzer && make clean - - make -C lib all && make clean - - pyenv global 3.4.4; make versionsTest MOREFLAGS=-I/usr/include/x86_64-linux-gnu && make clean - - make travis-install && make clean - # Longer tests - - gcc -v; CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean - - make usan && make clean - - clang -v; make staticAnalyze && make clean - # Valgrind tests - - make -C tests test-mem && make clean - # ARM, AArch64, PowerPC, PowerPC64 tests - - make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static && make clean - - make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static MOREFLAGS=-m64 && make clean - - make platformTest CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static && make clean - - make platformTest CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static && make clean From 32272f9866eb6d46e4457190cc7562ade2aa2ca8 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 10 Sep 2018 15:51:53 -0700 Subject: [PATCH 41/45] removed temporary debug traces --- lib/lz4.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index dbda4f14d..c68a7daa7 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -1583,14 +1583,12 @@ LZ4_decompress_generic( /* specific : partial decode : does not respect end parsing restrictions */ assert(op<=oend); if (partialDecoding && (cpy > oend-12)) { - DEBUGLOG(2, "match copy close to the end"); size_t const mlen = MIN(length, (size_t)(oend-op)); const BYTE* const matchEnd = match + mlen; BYTE* const copyEnd = op + mlen; if (matchEnd > op) { /* overlap copy */ while (op < copyEnd) *op++ = *match++; } else { - DEBUGLOG(2, "let's memcopy %zu bytes (non overlapping)", mlen); memcpy(op, match, mlen); } op = copyEnd; From 63fc6fbf7ee9e6a587598af085f22742cd9e6798 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 10 Sep 2018 16:22:16 -0700 Subject: [PATCH 42/45] restored nullifying output to counter possible (offset==0) --- lib/lz4.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/lz4.c b/lib/lz4.c index c68a7daa7..0545bbc5a 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -1536,7 +1536,11 @@ LZ4_decompress_generic( _copy_match: if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */ - // LZ4_write32(op, (U32)offset); /* costs ~1%; silence an msan warning when offset==0 */ /* note : no longer valid with partialDecoding, since there is no guarantee that at least 4 bytes are available */ + if (!partialDecoding) { + assert(oend > op); + assert(oend - op >= 4); + LZ4_write32(op, 0); /* silence an msan warning when offset==0; costs <1%; */ + } /* note : when partialDecoding, there is no guarantee that at least 4 bytes remain available in output buffer */ if (length == ML_MASK) { unsigned s; From b87a8e9e623d6e2404ae9f948a4e0ee8f1415bee Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 10 Sep 2018 16:48:41 -0700 Subject: [PATCH 43/45] fixed minor warning in fuzzer.c added a few more comments and assert() --- lib/lz4.c | 14 ++++++++------ lib/lz4.h | 8 ++++---- tests/fuzzer.c | 1 - 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index 0545bbc5a..133501db8 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -1,6 +1,6 @@ /* LZ4 - Fast LZ compression algorithm - Copyright (C) 2011-2017, Yann Collet. + Copyright (C) 2011-present, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -1405,9 +1405,9 @@ LZ4_decompress_generic( int srcSize, int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */ - endCondition_directive endOnInput, /* endOnOutputSize, endOnInputSize */ - earlyEnd_directive partialDecoding, /* full, partial */ - int dict, /* noDict, withPrefix64k, usingExtDict */ + endCondition_directive endOnInput, /* endOnOutputSize, endOnInputSize */ + earlyEnd_directive partialDecoding, /* full, partial */ + dict_directive dict, /* noDict, withPrefix64k, usingExtDict */ const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */ const BYTE* const dictStart, /* only if dict==usingExtDict */ const size_t dictSize /* note : = 0 if noDict */ @@ -1434,6 +1434,7 @@ LZ4_decompress_generic( DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize); /* Special cases */ + assert(lowPrefix <= op); assert(src != NULL); if ((endOnInput) && (unlikely(outputSize==0))) return ((srcSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */ if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0 ? 1 : -1); @@ -1504,6 +1505,7 @@ LZ4_decompress_generic( /* copy literals */ cpy = op+length; + LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) ) || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) ) { @@ -1523,7 +1525,7 @@ LZ4_decompress_generic( } } else { - LZ4_wildCopy(op, ip, cpy); + LZ4_wildCopy(op, ip, cpy); /* may overwrite up to WILDCOPYLENGTH beyond cpy */ ip += length; op = cpy; } @@ -1584,7 +1586,7 @@ LZ4_decompress_generic( /* copy match within block */ cpy = op + length; - /* specific : partial decode : does not respect end parsing restrictions */ + /* partialDecoding : may not respect endBlock parsing restrictions */ assert(op<=oend); if (partialDecoding && (cpy > oend-12)) { size_t const mlen = MIN(length, (size_t)(oend-op)); diff --git a/lib/lz4.h b/lib/lz4.h index c8ee206ac..059ef7c1b 100644 --- a/lib/lz4.h +++ b/lib/lz4.h @@ -1,7 +1,7 @@ /* * LZ4 - Fast LZ compression algorithm * Header File - * Copyright (C) 2011-2017, Yann Collet. + * Copyright (C) 2011-present, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -46,7 +46,7 @@ extern "C" { /** Introduction - LZ4 is lossless compression algorithm, providing compression speed at 400 MB/s per core, + LZ4 is lossless compression algorithm, providing compression speed at 500 MB/s per core, scalable with multi-cores CPU. It features an extremely fast decoder, with speed in multiple GB/s per core, typically reaching RAM speed limits on multi-core systems. @@ -62,8 +62,8 @@ extern "C" { An additional format, called LZ4 frame specification (doc/lz4_Frame_format.md), take care of encoding standard metadata alongside LZ4-compressed blocks. - If your application requires interoperability, it's recommended to use it. - A library is provided to take care of it, see lz4frame.h. + Frame format is required for interoperability. + It is delivered through a companion API, declared in lz4frame.h. */ /*^*************************************************************** diff --git a/tests/fuzzer.c b/tests/fuzzer.c index d6a5f511e..b29e82e4e 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -583,7 +583,6 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c { size_t const missingBytes = FUZ_rand(&randState) % blockSize; int const targetSize = (int)(blockSize - missingBytes); char const sentinel = decodedBuffer[targetSize] = block[targetSize] ^ 0x5A; - assert(decodedBuffer[targetSize] == sentinel); int const decResult = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, compressedSize, targetSize, blockSize); FUZ_CHECKTEST(decResult<0, "LZ4_decompress_safe_partial failed despite valid input data (error:%i)", decResult); FUZ_CHECKTEST(decResult != targetSize, "LZ4_decompress_safe_partial did not regenerated required amount of data (%i < %i <= %i)", decResult, targetSize, blockSize); From 6d32240b2e9cb921f9b34b790d787d0ee1ea51cb Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 10 Sep 2018 17:36:40 -0700 Subject: [PATCH 44/45] clarify constant MFLIMIT and separate it from MATCH_SAFEGUARD_DISTANCE. While both constants have same value, they do not seve same purpose, hence should not be confused. --- lib/lz4.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index 133501db8..4046102e6 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -297,8 +297,9 @@ void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd) #define MINMATCH 4 #define WILDCOPYLENGTH 8 -#define LASTLITERALS 5 -#define MFLIMIT (WILDCOPYLENGTH+MINMATCH) +#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ +#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ +#define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */ static const int LZ4_minLength = (MFLIMIT+1); #define KB *(1 <<10) @@ -1588,7 +1589,7 @@ LZ4_decompress_generic( /* partialDecoding : may not respect endBlock parsing restrictions */ assert(op<=oend); - if (partialDecoding && (cpy > oend-12)) { + if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { size_t const mlen = MIN(length, (size_t)(oend-op)); const BYTE* const matchEnd = match + mlen; BYTE* const copyEnd = op + mlen; @@ -1616,7 +1617,7 @@ LZ4_decompress_generic( } op += 8; - if (unlikely(cpy > oend-12)) { + if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1); if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ if (op < oCopyLimit) { From 08d347b5b217b011ff7487130b79480d8cfdaeb8 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 11 Sep 2018 10:15:31 -0700 Subject: [PATCH 45/45] updated NEWS for v1.8.3 release --- NEWS | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index 8ee3c92d4..13a9a1c2d 100644 --- a/NEWS +++ b/NEWS @@ -1,7 +1,9 @@ v1.8.3 -fix : data corruption for files > 64KB at level 9 under specific conditions (#560) +perf: minor decompression speed improvement (~+2%) with gcc +fix : corruption in v1.8.2 at level 9 for files > 64KB under rare conditions (#560) cli : new command --fast, by @jennifermliu -build : added Haiku target, by @fbrosson +api : LZ4_decompress_safe_partial() now decodes exactly the nb of bytes requested (feature request #566) +build : added Haiku target, by @fbrosson, and MidnightBSD, by @laffer1 doc : updated documentation regarding dictionary compression v1.8.2