diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 000000000..4c08cb2f4 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,108 @@ +# This configuration was automatically generated from a CircleCI 1.0 config. +# It should include any build commands you had along with commands that CircleCI +# inferred from your project structure. We strongly recommend you read all the +# comments in this file to understand the structure of CircleCI 2.0, as the idiom +# for configuration has changed substantially in 2.0 to allow arbitrary jobs rather +# than the prescribed lifecycle of 1.0. In general, we recommend using this generated +# configuration as a reference rather than using it in production, though in most +# cases it should duplicate the execution of your original 1.0 config. +version: 2 +jobs: + build: + working_directory: ~/lz4/lz4 + parallelism: 1 + shell: /bin/bash --login + # CircleCI 2.0 does not support environment variables that refer to each other the same way as 1.0 did. + # If any of these refer to each other, rewrite them so that they don't or see https://circleci.com/docs/2.0/env-vars/#interpolating-environment-variables-to-set-other-environment-variables . + environment: + CIRCLE_ARTIFACTS: /tmp/circleci-artifacts + CIRCLE_TEST_REPORTS: /tmp/circleci-test-results + # In CircleCI 1.0 we used a pre-configured image with a large number of languages and other packages. + # In CircleCI 2.0 you can now specify your own image, or use one of our pre-configured images. + # The following configuration line tells CircleCI to use the specified docker image as the runtime environment for you job. + # We have selected a pre-built image that mirrors the build environment we use on + # the 1.0 platform, but we recommend you choose an image more tailored to the needs + # of each job. For more information on choosing an image (or alternatively using a + # VM instead of a container) see https://circleci.com/docs/2.0/executor-types/ + # To see the list of pre-built images that CircleCI provides for most common languages see + # https://circleci.com/docs/2.0/circleci-images/ + docker: + - image: circleci/build-image:ubuntu-14.04-XXL-upstart-1189-5614f37 + command: /sbin/init + steps: + # Machine Setup + # If you break your build into multiple jobs with workflows, you will probably want to do the parts of this that are relevant in each + # The following `checkout` command checks out your code to your working directory. In 1.0 we did this implicitly. In 2.0 you can choose where in the course of a job your code should be checked out. + - checkout + # Prepare for artifact and test results collection equivalent to how it was done on 1.0. + # In many cases you can simplify this from what is generated here. + # 'See docs on artifact collection here https://circleci.com/docs/2.0/artifacts/' + - run: mkdir -p $CIRCLE_ARTIFACTS $CIRCLE_TEST_REPORTS + # Dependencies + # This would typically go in either a build or a build-and-test job when using workflows + # Restore the dependency cache + - restore_cache: + keys: + # This branch if available + - v1-dep-{{ .Branch }}- + # Default branch if not + - v1-dep-dev- + # Any branch if there are none on the default branch - this should be unnecessary if you have your default branch configured correctly + - v1-dep- + # This is based on your 1.0 configuration file or project settings + - run: sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test; sudo apt-get -y -qq update + - run: sudo apt-get -y install qemu-system-ppc qemu-user-static gcc-powerpc-linux-gnu + - run: sudo apt-get -y install qemu-system-arm gcc-arm-linux-gnueabi libc6-dev-armel-cross gcc-aarch64-linux-gnu libc6-dev-arm64-cross + - run: sudo apt-get -y install libc6-dev-i386 clang gcc-5 gcc-5-multilib gcc-6 valgrind + # Save dependency cache + - save_cache: + key: v1-dep-{{ .Branch }}-{{ epoch }} + paths: + # This is a broad list of cache paths to include many possible development environments + # You can probably delete some of these entries + - vendor/bundle + - ~/virtualenvs + - ~/.m2 + - ~/.ivy2 + - ~/.bundle + - ~/.go_workspace + - ~/.gradle + - ~/.cache/bower + # Test + # This would typically be a build job when using workflows, possibly combined with build + # This is based on your 1.0 configuration file or project settings + - run: clang -v; make clangtest && make clean + - run: g++ -v; make gpptest && make clean + - run: gcc -v; make c_standards && make clean + - run: gcc -v; g++ -v; make ctocpptest && make clean + - run: gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -Werror" make check && make clean + - run: gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean + - run: gcc-6 -v; CC=gcc-6 make c_standards && make clean + - run: gcc-6 -v; CC=gcc-6 MOREFLAGS="-O2 -Werror" make check && make clean + - run: make cmake && make clean + - run: make -C tests test-lz4 + - run: make -C tests test-lz4c + - run: make -C tests test-frametest + - run: make -C tests test-fullbench + - run: make -C tests test-fuzzer && make clean + - run: make -C lib all && make clean + - run: pyenv global 3.4.4; make versionsTest MOREFLAGS=-I/usr/include/x86_64-linux-gnu && make clean + - run: make travis-install && make clean + - run: gcc -v; CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean + - run: make usan && make clean + - run: clang -v; make staticAnalyze && make clean + - run: make -C tests test-mem && make clean + - run: make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static && make clean + - run: make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static MOREFLAGS=-m64 && make clean + - run: make platformTest CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static && make clean + - run: make platformTest CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static && make clean + # Teardown + # If you break your build into multiple jobs with workflows, you will probably want to do the parts of this that are relevant in each + # Save test results + - store_test_results: + path: /tmp/circleci-test-results + # Save artifacts + - store_artifacts: + path: /tmp/circleci-artifacts + - store_artifacts: + path: /tmp/circleci-test-results diff --git a/.travis.yml b/.travis.yml index 0a876f925..de6875be7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -49,6 +49,10 @@ matrix: packages: - valgrind + - env: Ubu=14.04 Cmd='make ctocpptest' COMPILER=cc + dist: trusty + sudo: false + - env: Ubu=14.04 Cmd='make -C tests test-lz4c32 test-fullbench32 versionsTest' COMPILER=cc dist: trusty sudo: required diff --git a/Makefile b/Makefile index 86613fd71..69a34b773 100644 --- a/Makefile +++ b/Makefile @@ -89,7 +89,7 @@ clean: #----------------------------------------------------------------------------- # make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets #----------------------------------------------------------------------------- -ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS)) +ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD)) HOST_OS = POSIX .PHONY: install uninstall @@ -172,6 +172,14 @@ gpptest gpptest32: clean CC=$(CC) $(MAKE) -C $(PRGDIR) all CFLAGS="$(CFLAGS)" CC=$(CC) $(MAKE) -C $(TESTDIR) all CFLAGS="$(CFLAGS)" +ctocpptest: LIBCC="$(CC)" +ctocpptest: TESTCC="$(CXX)" +ctocpptest: CFLAGS="" +ctocpptest: clean + CC=$(LIBCC) $(MAKE) -C $(LZ4DIR) CFLAGS="$(CFLAGS)" all + CC=$(LIBCC) $(MAKE) -C $(TESTDIR) CFLAGS="$(CFLAGS)" lz4.o lz4hc.o lz4frame.o + CC=$(TESTCC) $(MAKE) -C $(TESTDIR) CFLAGS="$(CFLAGS)" all + c_standards: clean CFLAGS="-std=c90 -Werror" $(MAKE) clean allmost CFLAGS="-std=gnu90 -Werror" $(MAKE) clean allmost diff --git a/NEWS b/NEWS index 0139e6123..13a9a1c2d 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,11 @@ +v1.8.3 +perf: minor decompression speed improvement (~+2%) with gcc +fix : corruption in v1.8.2 at level 9 for files > 64KB under rare conditions (#560) +cli : new command --fast, by @jennifermliu +api : LZ4_decompress_safe_partial() now decodes exactly the nb of bytes requested (feature request #566) +build : added Haiku target, by @fbrosson, and MidnightBSD, by @laffer1 +doc : updated documentation regarding dictionary compression + v1.8.2 perf: *much* faster dictionary compression on small files, by @felixhandte perf: improved decompression speed and binary size, by Alexey Tourbin (@svpv) diff --git a/README.md b/README.md index 406792a11..e64020d1c 100644 --- a/README.md +++ b/README.md @@ -2,18 +2,23 @@ LZ4 - Extremely fast compression ================================ LZ4 is lossless compression algorithm, -providing compression speed at 400 MB/s per core, +providing compression speed > 500 MB/s per core, scalable with multi-cores CPU. It features an extremely fast decoder, with speed in multiple GB/s per core, typically reaching RAM speed limits on multi-core systems. Speed can be tuned dynamically, selecting an "acceleration" factor -which trades compression ratio for more speed up. +which trades compression ratio for faster speed. On the other end, a high compression derivative, LZ4_HC, is also provided, trading CPU time for improved compression ratio. All versions feature the same decompression speed. +LZ4 is also compatible with [dictionary compression](https://github.com/facebook/zstd#the-case-for-small-data-compression), +and can ingest any input file as dictionary, +including those created by [Zstandard Dictionary Builder](https://github.com/facebook/zstd/blob/v1.3.5/programs/zstd.1.md#dictionary-builder). +(note: only the final 64KB are used). + LZ4 library is provided as open-source software using BSD 2-Clause license. @@ -67,8 +72,8 @@ in single-thread mode. [zlib]: http://www.zlib.net/ [Zstandard]: http://www.zstd.net/ -LZ4 is also compatible and well optimized for x32 mode, -for which it provides some additional speed performance. +LZ4 is also compatible and optimized for x32 mode, +for which it provides additional speed performance. Installation @@ -76,7 +81,7 @@ Installation ``` make -make install # this command may require root access +make install # this command may require root permissions ``` LZ4's `Makefile` supports standard [Makefile conventions], @@ -94,10 +99,10 @@ Documentation The raw LZ4 block compression format is detailed within [lz4_Block_format]. -To compress an arbitrarily long file or data stream, multiple blocks are required. -Organizing these blocks and providing a common header format to handle their content -is the purpose of the Frame format, defined into [lz4_Frame_format]. -Interoperable versions of LZ4 must respect this frame format. +Arbitrarily long files or data streams are compressed using multiple blocks, +for streaming requirements. These blocks are organized into a frame, +defined into [lz4_Frame_format]. +Interoperable versions of LZ4 must also respect the frame format. [lz4_Block_format]: doc/lz4_Block_format.md [lz4_Frame_format]: doc/lz4_Frame_format.md diff --git a/circle.yml b/circle.yml deleted file mode 100644 index fa3759069..000000000 --- a/circle.yml +++ /dev/null @@ -1,38 +0,0 @@ -dependencies: - override: - - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test; sudo apt-get -y -qq update - - sudo apt-get -y install qemu-system-ppc qemu-user-static gcc-powerpc-linux-gnu - - sudo apt-get -y install qemu-system-arm gcc-arm-linux-gnueabi libc6-dev-armel-cross gcc-aarch64-linux-gnu libc6-dev-arm64-cross - - sudo apt-get -y install libc6-dev-i386 clang gcc-5 gcc-5-multilib gcc-6 valgrind - -test: - override: - # Tests compilers and C standards - - clang -v; make clangtest && make clean - - g++ -v; make gpptest && make clean - - gcc -v; make c_standards && make clean - - gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -Werror" make check && make clean - - gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean - - gcc-6 -v; CC=gcc-6 make c_standards && make clean - - gcc-6 -v; CC=gcc-6 MOREFLAGS="-O2 -Werror" make check && make clean -# Shorter tests - - make cmake && make clean - - make -C tests test-lz4 - - make -C tests test-lz4c - - make -C tests test-frametest - - make -C tests test-fullbench - - make -C tests test-fuzzer && make clean - - make -C lib all && make clean - - pyenv global 3.4.4; make versionsTest MOREFLAGS=-I/usr/include/x86_64-linux-gnu && make clean - - make travis-install && make clean - # Longer tests - - gcc -v; CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean - - make usan && make clean - - clang -v; make staticAnalyze && make clean - # Valgrind tests - - make -C tests test-mem && make clean - # ARM, AArch64, PowerPC, PowerPC64 tests - - make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static && make clean - - make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static MOREFLAGS=-m64 && make clean - - make platformTest CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static && make clean - - make platformTest CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static && make clean diff --git a/contrib/cmake_unofficial/CMakeLists.txt b/contrib/cmake_unofficial/CMakeLists.txt index 27c3a7881..b09c4fb0e 100644 --- a/contrib/cmake_unofficial/CMakeLists.txt +++ b/contrib/cmake_unofficial/CMakeLists.txt @@ -12,6 +12,8 @@ set(LZ4_TOP_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../..") +option(LZ4_BUILD_LEGACY_LZ4C "Build lz4c progam with legacy argument support" ON) + # Parse version information file(STRINGS "${LZ4_TOP_SOURCE_DIR}/lib/lz4.h" LZ4_VERSION_MAJOR REGEX "^#define LZ4_VERSION_MAJOR +([0-9]+) +.*$") string(REGEX REPLACE "^#define LZ4_VERSION_MAJOR +([0-9]+) +.*$" "\\1" LZ4_VERSION_MAJOR "${LZ4_VERSION_MAJOR}") @@ -122,14 +124,18 @@ else() endif() # lz4 +set(LZ4_PROGRAMS_BUILT lz4cli) add_executable(lz4cli ${LZ4_CLI_SOURCES}) set_target_properties(lz4cli PROPERTIES OUTPUT_NAME lz4) target_link_libraries(lz4cli ${LZ4_LINK_LIBRARY}) # lz4c -add_executable(lz4c ${LZ4_CLI_SOURCES}) -set_target_properties(lz4c PROPERTIES COMPILE_DEFINITIONS "ENABLE_LZ4C_LEGACY_OPTIONS") -target_link_libraries(lz4c ${LZ4_LINK_LIBRARY}) +if (LZ4_BUILD_LEGACY_LZ4C) + list(APPEND LZ4_PROGRAMS_BUILT lz4c) + add_executable(lz4c ${LZ4_CLI_SOURCES}) + set_target_properties(lz4c PROPERTIES COMPILE_DEFINITIONS "ENABLE_LZ4C_LEGACY_OPTIONS") + target_link_libraries(lz4c ${LZ4_LINK_LIBRARY}) +endif() # Extra warning flags include (CheckCCompilerFlag) @@ -165,7 +171,7 @@ endforeach (flag) if(NOT LZ4_BUNDLED_MODE) include(GNUInstallDirs) - install(TARGETS lz4cli lz4c + install(TARGETS ${LZ4_PROGRAMS_BUILT} RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") install(TARGETS ${LZ4_LIBRARIES_BUILT} LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" diff --git a/doc/lz4_manual.html b/doc/lz4_manual.html index e5044fe70..6ebf8d281 100644 --- a/doc/lz4_manual.html +++ b/doc/lz4_manual.html @@ -1,10 +1,10 @@
-Same compression function, just using an externally allocated memory space to store compression state. Use LZ4_sizeofState() to know how much memory must be allocated, and allocate it on 8-bytes boundaries (using malloc() typically). - Then, provide it as 'void* state' to compression function. + Then, provide this buffer as 'void* state' to compression function.
int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize); -Reverse the logic : compresses as much data as possible from 'src' buffer - into already allocated buffer 'dst' of size 'targetDestSize'. - This function either compresses the entire 'src' content into 'dst' if it's large enough, - or fill 'dst' buffer completely with as much data as possible from 'src'. - *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'. - New value is necessarily <= old value. - return : Nb bytes written into 'dst' (necessarily <= targetDestSize) - or 0 if compression fails +
Reverse the logic : compresses as much data as possible from 'src' buffer + into already allocated buffer 'dst', of size >= 'targetDestSize'. + This function either compresses the entire 'src' content into 'dst' if it's large enough, + or fill 'dst' buffer completely with as much data as possible from 'src'. + note: acceleration parameter is fixed to "default". + + *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'. + New value is necessarily <= input value. + @return : Nb bytes written into 'dst' (necessarily <= targetDestSize) + or 0 if compression fails.
int LZ4_decompress_fast (const char* src, char* dst, int originalSize); -This function is a bit faster than LZ4_decompress_safe(), -but it may misbehave on malformed input because it doesn't perform full validation of compressed data. - originalSize : is the uncompressed size to regenerate - Destination buffer must be already allocated, and its size must be >= 'originalSize' bytes. - return : number of bytes read from source buffer (== compressed size). - If the source stream is detected malformed, the function stops decoding and return a negative result. - note : This function is only usable if the originalSize of uncompressed data is known in advance. - The caller should also check that all the compressed input has been consumed properly, - i.e. that the return value matches the size of the buffer with compressed input. - The function never writes past the output buffer. However, since it doesn't know its 'src' size, - it may read past the intended input. Also, because match offsets are not validated during decoding, - reads from 'src' may underflow. Use this function in trusted environment **only**. +
This function used to be a bit faster than LZ4_decompress_safe(), + though situation has changed in recent versions, + and now `LZ4_decompress_safe()` can be as fast and sometimes faster than `LZ4_decompress_fast()`. + Moreover, LZ4_decompress_fast() is not protected vs malformed input, as it doesn't perform full validation of compressed data. + As a consequence, this function is no longer recommended, and may be deprecated in future versions. + It's only remaining specificity is that it can decompress data without knowing its compressed size. + + originalSize : is the uncompressed size to regenerate. + `dst` must be already allocated, its size must be >= 'originalSize' bytes. + @return : number of bytes read from source buffer (== compressed size). + If the source stream is detected malformed, the function stops decoding and returns a negative result. + note : This function requires uncompressed originalSize to be known in advance. + The function never writes past the output buffer. + However, since it doesn't know its 'src' size, it may read past the intended input. + Also, because match offsets are not validated during decoding, + reads from 'src' may underflow. + Use this function in trusted environment **only**. +
int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity); -This function decompress a compressed block of size 'srcSize' at position 'src' - into destination buffer 'dst' of size 'dstCapacity'. - The function will decompress a minimum of 'targetOutputSize' bytes, and stop after that. - However, it's not accurate, and may write more than 'targetOutputSize' (but always <= dstCapacity). - @return : the number of bytes decoded in the destination buffer (necessarily <= dstCapacity) - Note : this number can also be < targetOutputSize, if compressed block contains less data. - Therefore, always control how many bytes were decoded. - If source stream is detected malformed, function returns a negative result. - This function is protected against malicious data packets. +
Decompress an LZ4 compressed block, of size 'srcSize' at position 'src', + into destination buffer 'dst' of size 'dstCapacity'. + Up to 'targetOutputSize' bytes will be decoded. + The function stops decoding on reaching this objective, + which can boost performance when only the beginning of a block is required. + + @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity) + If source stream is detected malformed, function returns a negative result. + + Note : @return can be < targetOutputSize, if compressed block contains less data. + + Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity, + and expects targetOutputSize <= dstCapacity. + It effectively stops decoding on reaching targetOutputSize, + so dstCapacity is kind of redundant. + This is because in a previous version of this function, + decoding operation would not "break" a sequence in the middle. + As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize, + it could write more bytes, though only up to dstCapacity. + Some "margin" used to be required for this operation to work properly. + This is no longer necessary. + The function nonetheless keeps its signature, in an effort to not break API. +
int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);An LZ4_streamDecode_t context can be allocated once and re-used multiple times. Use this function to start decompression of a new stream of blocks. - A dictionary can optionnally be set. Use NULL or size 0 for a reset order. + A dictionary can optionally be set. Use NULL or size 0 for a reset order. Dictionary is presumed stable : it must remain accessible and unmodified during next decompression. @return : 1 if OK, 0 if error diff --git a/doc/lz4frame_manual.html b/doc/lz4frame_manual.html index 53ea7eb19..fb8e0ceb3 100644 --- a/doc/lz4frame_manual.html +++ b/doc/lz4frame_manual.html @@ -1,10 +1,10 @@
-1.8.2 Manual +1.8.3 Manual -1.8.2 Manual
+1.8.3 Manual
Contents
diff --git a/examples/frameCompress.c b/examples/frameCompress.c index 9bfea483f..a0c5d3d80 100644 --- a/examples/frameCompress.c +++ b/examples/frameCompress.c @@ -31,12 +31,13 @@ static const LZ4F_preferences_t kPrefs = { static void safe_fwrite(void* buf, size_t eltSize, size_t nbElt, FILE* f) { size_t const writtenSize = fwrite(buf, eltSize, nbElt, f); - size_t const expectedSize = eltSize * nbElt; /* note : should check for overflow */ + size_t const expectedSize = eltSize * nbElt; + assert(expectedSize / nbElt == eltSize); /* check overflow */ if (writtenSize < expectedSize) { if (ferror(f)) /* note : ferror() must follow fwrite */ - printf("Write failed\n"); + fprintf(stderr, "Write failed \n"); else - printf("Short write\n"); + fprintf(stderr, "Short write \n"); exit(1); } } @@ -54,9 +55,9 @@ typedef struct { static compressResult_t compress_file_internal(FILE* f_in, FILE* f_out, - LZ4F_compressionContext_t ctx, - void* inBuff, size_t inChunkSize, - void* outBuff, size_t outCapacity) + LZ4F_compressionContext_t ctx, + void* inBuff, size_t inChunkSize, + void* outBuff, size_t outCapacity) { compressResult_t result = { 1, 0, 0 }; /* result for an error */ unsigned long long count_in = 0, count_out; @@ -167,9 +168,9 @@ static size_t get_block_size(const LZ4F_frameInfo_t* info) { /* @return : 1==error, 0==success */ static int decompress_file_internal(FILE* f_in, FILE* f_out, - LZ4F_dctx* dctx, - void* src, size_t srcCapacity, size_t filled, size_t alreadyConsumed, - void* dst, size_t dstCapacity) + LZ4F_dctx* dctx, + void* src, size_t srcCapacity, size_t filled, size_t alreadyConsumed, + void* dst, size_t dstCapacity) { int firstChunk = 1; size_t ret = 1; @@ -194,7 +195,7 @@ decompress_file_internal(FILE* f_in, FILE* f_out, * Continue while there is more input to read (srcPtr != srcEnd) * and the frame isn't over (ret != 0) */ - while (srcPtr != srcEnd && ret != 0) { + while (srcPtr < srcEnd && ret != 0) { /* Any data within dst has been flushed at this stage */ size_t dstSize = dstCapacity; size_t srcSize = srcEnd - srcPtr; @@ -208,9 +209,20 @@ decompress_file_internal(FILE* f_in, FILE* f_out, /* Update input */ srcPtr += srcSize; } + + assert(srcPtr <= srcEnd); + + /* Ensure all input data has been consumed. + * It is valid to have multiple frames in the same file, + * but this example only supports one frame. + */ + if (srcPtr < srcEnd) { + printf("Decompress: Trailing data left in file after frame\n"); + return 1; + } } - /* Check that there isn't trailing input data after the frame. + /* Check that there isn't trailing data in the file after the frame. * It is valid to have multiple frames in the same file, * but this example only supports one frame. */ @@ -260,7 +272,7 @@ decompress_file_allocDst(FILE* f_in, FILE* f_out, int const decompressionResult = decompress_file_internal( f_in, f_out, dctx, - src, srcCapacity, readSize, consumedSize, + src, srcCapacity, readSize-consumedSize, consumedSize, dst, dstCapacity); free(dst); @@ -278,7 +290,7 @@ static int decompress_file(FILE* f_in, FILE* f_out) if (!src) { perror("decompress_file(src)"); return 1; } LZ4F_dctx* dctx; - { size_t const dctxStatus = LZ4F_createDecompressionContext(&dctx, 100); + { size_t const dctxStatus = LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION); if (LZ4F_isError(dctxStatus)) { printf("LZ4F_dctx creation error: %s\n", LZ4F_getErrorName(dctxStatus)); } } diff --git a/lib/Makefile b/lib/Makefile index abb6c075a..88d9b4f28 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -45,6 +45,7 @@ LIBVER := $(shell echo $(LIBVER_SCRIPT)) BUILD_SHARED:=yes BUILD_STATIC:=yes +OS ?= $(shell uname) CPPFLAGS+= -DXXH_NAMESPACE=LZ4_ CFLAGS ?= -O3 DEBUGFLAGS:= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ @@ -58,7 +59,7 @@ SRCFILES := $(sort $(wildcard *.c)) # OS X linker doesn't support -soname, and use different extension # see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html -ifeq ($(shell uname), Darwin) +ifeq ($(OS), Darwin) SHARED_EXT = dylib SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT) SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT) @@ -123,7 +124,7 @@ clean: #----------------------------------------------------------------------------- # make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets #----------------------------------------------------------------------------- -ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS)) +ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD)) .PHONY: listL120 listL120: # extract lines >= 120 characters in *.{c,h}, by Takayuki Matsuoka (note : $$, for Makefile compatibility) @@ -142,14 +143,14 @@ libdir ?= $(LIBDIR) INCLUDEDIR ?= $(prefix)/include includedir ?= $(INCLUDEDIR) -ifneq (,$(filter $(shell uname),OpenBSD FreeBSD NetBSD DragonFly)) +ifneq (,$(filter $(OS),OpenBSD FreeBSD NetBSD DragonFly)) PKGCONFIGDIR ?= $(prefix)/libdata/pkgconfig else PKGCONFIGDIR ?= $(libdir)/pkgconfig endif pkgconfigdir ?= $(PKGCONFIGDIR) -ifneq (,$(filter $(shell uname),SunOS)) +ifneq (,$(filter $(OS),SunOS)) INSTALL ?= ginstall else INSTALL ?= install diff --git a/lib/lz4.c b/lib/lz4.c index e51a3e0a4..4046102e6 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -1,6 +1,6 @@ /* LZ4 - Fast LZ compression algorithm - Copyright (C) 2011-2017, Yann Collet. + Copyright (C) 2011-present, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -297,8 +297,9 @@ void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd) #define MINMATCH 4 #define WILDCOPYLENGTH 8 -#define LASTLITERALS 5 -#define MFLIMIT (WILDCOPYLENGTH+MINMATCH) +#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ +#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ +#define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */ static const int LZ4_minLength = (MFLIMIT+1); #define KB *(1 <<10) @@ -483,9 +484,6 @@ typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t; typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive; typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; -typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; -typedef enum { full = 0, partial = 1 } earlyEnd_directive; - /*-************************************ * Local Utils @@ -496,6 +494,21 @@ int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } int LZ4_sizeofState() { return LZ4_STREAMSIZE; } +/*-************************************ +* Internal Definitions used in Tests +**************************************/ +#if defined (__cplusplus) +extern "C" { +#endif + +int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize); + +int LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize, const void* dict, size_t dictSize); + +#if defined (__cplusplus) +} +#endif + /*-****************************** * Compression functions ********************************/ @@ -669,9 +682,9 @@ LZ4_FORCE_INLINE int LZ4_compress_generic( /* the dictCtx currentOffset is indexed on the start of the dictionary, * while a dictionary in the current context precedes the currentOffset */ - const BYTE* dictBase = dictDirective == usingDictCtx ? - dictionary + dictSize - dictCtx->currentOffset : - dictionary + dictSize - startIndex; + const BYTE* dictBase = (dictDirective == usingDictCtx) ? + dictionary + dictSize - dictCtx->currentOffset : + dictionary + dictSize - startIndex; BYTE* op = (BYTE*) dest; BYTE* const olimit = op + maxOutputSize; @@ -699,7 +712,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic( cctx->dictSize += (U32)inputSize; } cctx->currentOffset += (U32)inputSize; - cctx->tableType = tableType; + cctx->tableType = (U16)tableType; if (inputSize
oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => just decode everything */ + assert(lowPrefix <= op); + assert(src != NULL); if ((endOnInput) && (unlikely(outputSize==0))) return ((srcSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */ - if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1); + if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0 ? 1 : -1); if ((endOnInput) && unlikely(srcSize==0)) return -1; /* Main Loop : decode sequences */ @@ -1428,7 +1447,7 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( size_t offset; unsigned const token = *ip++; - size_t length = token >> ML_BITS; /* literal length */ + size_t length = token >> ML_BITS; /* literal length */ assert(!endOnInput || ip <= iend); /* ip < iend before the increment */ @@ -1453,6 +1472,7 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( length = token & ML_MASK; /* match length */ offset = LZ4_readLE16(ip); ip += 2; match = op - offset; + assert(match <= op); /* check overflow */ /* Do not deal with overlapping matches. */ if ( (length != ML_MASK) @@ -1486,11 +1506,12 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( /* copy literals */ cpy = op+length; - if ( ((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) ) - || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) ) + LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); + if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) ) + || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) ) { if (partialDecoding) { - if (cpy > oend) goto _output_error; /* Error : write attempt beyond end of output buffer */ + if (cpy > oend) { cpy = oend; length = oend-op; } /* Partial decoding : stop in the middle of literal segment */ if ((endOnInput) && (ip+length > iend)) goto _output_error; /* Error : read attempt beyond end of input buffer */ } else { if ((!endOnInput) && (cpy != oend)) goto _output_error; /* Error : block decoding must stop exactly there */ @@ -1499,10 +1520,15 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( memcpy(op, ip, length); ip += length; op += length; - break; /* Necessarily EOF, due to parsing restrictions */ + if (!partialDecoding || (cpy == oend)) { + /* Necessarily EOF, due to parsing restrictions */ + break; + } + + } else { + LZ4_wildCopy(op, ip, cpy); /* may overwrite up to WILDCOPYLENGTH beyond cpy */ + ip += length; op = cpy; } - LZ4_wildCopy(op, ip, cpy); - ip += length; op = cpy; /* get offset */ offset = LZ4_readLE16(ip); ip+=2; @@ -1513,7 +1539,11 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( _copy_match: if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */ - LZ4_write32(op, (U32)offset); /* costs ~1%; silence an msan warning when offset==0 */ + if (!partialDecoding) { + assert(oend > op); + assert(oend - op >= 4); + LZ4_write32(op, 0); /* silence an msan warning when offset==0; costs <1%; */ + } /* note : when partialDecoding, there is no guarantee that at least 4 bytes remain available in output buffer */ if (length == ML_MASK) { unsigned s; @@ -1526,21 +1556,24 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( } length += MINMATCH; - /* check external dictionary */ + /* match starting within external dictionary */ if ((dict==usingExtDict) && (match < lowPrefix)) { - if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error; /* doesn't respect parsing restriction */ + if (unlikely(op+length > oend-LASTLITERALS)) { + if (partialDecoding) length = MIN(length, (size_t)(oend-op)); + else goto _output_error; /* doesn't respect parsing restriction */ + } if (length <= (size_t)(lowPrefix-match)) { - /* match can be copied as a single segment from external dictionary */ + /* match fits entirely within external dictionary : just copy */ memmove(op, dictEnd - (lowPrefix-match), length); op += length; } else { - /* match encompass external dictionary and current block */ - size_t const copySize = (size_t)(lowPrefix-match); + /* match stretches into both external dictionary and current block */ + size_t const copySize = (size_t)(lowPrefix - match); size_t const restSize = length - copySize; memcpy(op, dictEnd - copySize, copySize); op += copySize; - if (restSize > (size_t)(op-lowPrefix)) { /* overlap copy */ + if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ BYTE* const endOfMatch = op + restSize; const BYTE* copyFrom = lowPrefix; while (op < endOfMatch) *op++ = *copyFrom++; @@ -1553,6 +1586,23 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( /* copy match within block */ cpy = op + length; + + /* partialDecoding : may not respect endBlock parsing restrictions */ + assert(op<=oend); + if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { + size_t const mlen = MIN(length, (size_t)(oend-op)); + const BYTE* const matchEnd = match + mlen; + BYTE* const copyEnd = op + mlen; + if (matchEnd > op) { /* overlap copy */ + while (op < copyEnd) *op++ = *match++; + } else { + memcpy(op, match, mlen); + } + op = copyEnd; + if (op==oend) break; + continue; + } + if (unlikely(offset<8)) { op[0] = match[0]; op[1] = match[1]; @@ -1561,23 +1611,26 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( match += inc32table[offset]; memcpy(op+4, match, 4); match -= dec64table[offset]; - } else { memcpy(op, match, 8); match+=8; } + } else { + memcpy(op, match, 8); + match += 8; + } op += 8; - if (unlikely(cpy>oend-12)) { - BYTE* const oCopyLimit = oend-(WILDCOPYLENGTH-1); + if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { + BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1); if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ if (op < oCopyLimit) { LZ4_wildCopy(op, match, oCopyLimit); match += oCopyLimit - op; op = oCopyLimit; } - while (op 16) LZ4_wildCopy(op+8, match+8, cpy); + if (length > 16) LZ4_wildCopy(op+8, match+8, cpy); } - op = cpy; /* correction */ + op = cpy; /* wildcopy correction */ } /* end of decoding */ @@ -1598,23 +1651,24 @@ LZ4_FORCE_O2_GCC_PPC64LE int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, - endOnInputSize, full, 0, noDict, + endOnInputSize, decode_full_block, noDict, (BYTE*)dest, NULL, 0); } LZ4_FORCE_O2_GCC_PPC64LE -int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize) +int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity) { - return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, - endOnInputSize, partial, targetOutputSize, - noDict, (BYTE*)dest, NULL, 0); + dstCapacity = MIN(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity, + endOnInputSize, partial_decode, + noDict, (BYTE*)dst, NULL, 0); } LZ4_FORCE_O2_GCC_PPC64LE int LZ4_decompress_fast(const char* source, char* dest, int originalSize) { return LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, full, 0, withPrefix64k, + endOnOutputSize, decode_full_block, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 0); } @@ -1624,7 +1678,7 @@ LZ4_FORCE_O2_GCC_PPC64LE /* Exported, an obsolete API function. */ int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - endOnInputSize, full, 0, withPrefix64k, + endOnInputSize, decode_full_block, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 0); } @@ -1641,17 +1695,17 @@ static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, i size_t prefixSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - endOnInputSize, full, 0, noDict, + endOnInputSize, decode_full_block, noDict, (BYTE*)dest-prefixSize, NULL, 0); } -LZ4_FORCE_O2_GCC_PPC64LE /* Exported under another name, for tests/fullbench.c */ -#define LZ4_decompress_safe_extDict LZ4_decompress_safe_forceExtDict -int LZ4_decompress_safe_extDict(const char* source, char* dest, int compressedSize, int maxOutputSize, - const void* dictStart, size_t dictSize) +LZ4_FORCE_O2_GCC_PPC64LE +int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, + int compressedSize, int maxOutputSize, + const void* dictStart, size_t dictSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - endOnInputSize, full, 0, usingExtDict, + endOnInputSize, decode_full_block, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize); } @@ -1660,7 +1714,7 @@ static int LZ4_decompress_fast_extDict(const char* source, char* dest, int origi const void* dictStart, size_t dictSize) { return LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, full, 0, usingExtDict, + endOnOutputSize, decode_full_block, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize); } @@ -1673,7 +1727,7 @@ int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compresse size_t prefixSize, const void* dictStart, size_t dictSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - endOnInputSize, full, 0, usingExtDict, + endOnInputSize, decode_full_block, usingExtDict, (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize); } @@ -1682,7 +1736,7 @@ int LZ4_decompress_fast_doubleDict(const char* source, char* dest, int originalS size_t prefixSize, const void* dictStart, size_t dictSize) { return LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, full, 0, usingExtDict, + endOnOutputSize, decode_full_block, usingExtDict, (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize); } @@ -1773,8 +1827,8 @@ int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch /* The buffer wraps around, or they're switching to another buffer. */ lz4sd->extDictSize = lz4sd->prefixSize; lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; - result = LZ4_decompress_safe_extDict(source, dest, compressedSize, maxOutputSize, - lz4sd->externalDict, lz4sd->extDictSize); + result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, + lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; lz4sd->prefixSize = result; lz4sd->prefixEnd = (BYTE*)dest + result; @@ -1834,7 +1888,7 @@ int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressed return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, dictSize); } - return LZ4_decompress_safe_extDict(source, dest, compressedSize, maxOutputSize, dictStart, dictSize); + return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, dictSize); } int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize) diff --git a/lib/lz4.h b/lib/lz4.h index 7d1312219..059ef7c1b 100644 --- a/lib/lz4.h +++ b/lib/lz4.h @@ -1,7 +1,7 @@ /* * LZ4 - Fast LZ compression algorithm * Header File - * Copyright (C) 2011-2017, Yann Collet. + * Copyright (C) 2011-present, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -46,7 +46,7 @@ extern "C" { /** Introduction - LZ4 is lossless compression algorithm, providing compression speed at 400 MB/s per core, + LZ4 is lossless compression algorithm, providing compression speed at 500 MB/s per core, scalable with multi-cores CPU. It features an extremely fast decoder, with speed in multiple GB/s per core, typically reaching RAM speed limits on multi-core systems. @@ -62,8 +62,8 @@ extern "C" { An additional format, called LZ4 frame specification (doc/lz4_Frame_format.md), take care of encoding standard metadata alongside LZ4-compressed blocks. - If your application requires interoperability, it's recommended to use it. - A library is provided to take care of it, see lz4frame.h. + Frame format is required for interoperability. + It is delivered through a companion API, declared in lz4frame.h. */ /*^*************************************************************** @@ -93,7 +93,7 @@ extern "C" { /*------ Version ------*/ #define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */ #define LZ4_VERSION_MINOR 8 /* for new (non-breaking) interface capabilities */ -#define LZ4_VERSION_RELEASE 2 /* for tweaks, bug-fixes, or development */ +#define LZ4_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */ #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE) @@ -183,55 +183,72 @@ LZ4_compress_fast_extState() : Same compression function, just using an externally allocated memory space to store compression state. Use LZ4_sizeofState() to know how much memory must be allocated, and allocate it on 8-bytes boundaries (using malloc() typically). - Then, provide it as 'void* state' to compression function. + Then, provide this buffer as 'void* state' to compression function. */ LZ4LIB_API int LZ4_sizeofState(void); LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); -/*! -LZ4_compress_destSize() : - Reverse the logic : compresses as much data as possible from 'src' buffer - into already allocated buffer 'dst' of size 'targetDestSize'. - This function either compresses the entire 'src' content into 'dst' if it's large enough, - or fill 'dst' buffer completely with as much data as possible from 'src'. - *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'. - New value is necessarily <= old value. - return : Nb bytes written into 'dst' (necessarily <= targetDestSize) - or 0 if compression fails +/*! LZ4_compress_destSize() : + * Reverse the logic : compresses as much data as possible from 'src' buffer + * into already allocated buffer 'dst', of size >= 'targetDestSize'. + * This function either compresses the entire 'src' content into 'dst' if it's large enough, + * or fill 'dst' buffer completely with as much data as possible from 'src'. + * note: acceleration parameter is fixed to "default". + * + * *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'. + * New value is necessarily <= input value. + * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize) + * or 0 if compression fails. */ LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize); -/*! -LZ4_decompress_fast() : **unsafe!** -This function is a bit faster than LZ4_decompress_safe(), -but it may misbehave on malformed input because it doesn't perform full validation of compressed data. - originalSize : is the uncompressed size to regenerate - Destination buffer must be already allocated, and its size must be >= 'originalSize' bytes. - return : number of bytes read from source buffer (== compressed size). - If the source stream is detected malformed, the function stops decoding and return a negative result. - note : This function is only usable if the originalSize of uncompressed data is known in advance. - The caller should also check that all the compressed input has been consumed properly, - i.e. that the return value matches the size of the buffer with compressed input. - The function never writes past the output buffer. However, since it doesn't know its 'src' size, - it may read past the intended input. Also, because match offsets are not validated during decoding, - reads from 'src' may underflow. Use this function in trusted environment **only**. -*/ +/*! LZ4_decompress_fast() : **unsafe!** + * This function used to be a bit faster than LZ4_decompress_safe(), + * though situation has changed in recent versions, + * and now `LZ4_decompress_safe()` can be as fast and sometimes faster than `LZ4_decompress_fast()`. + * Moreover, LZ4_decompress_fast() is not protected vs malformed input, as it doesn't perform full validation of compressed data. + * As a consequence, this function is no longer recommended, and may be deprecated in future versions. + * It's only remaining specificity is that it can decompress data without knowing its compressed size. + * + * originalSize : is the uncompressed size to regenerate. + * `dst` must be already allocated, its size must be >= 'originalSize' bytes. + * @return : number of bytes read from source buffer (== compressed size). + * If the source stream is detected malformed, the function stops decoding and returns a negative result. + * note : This function requires uncompressed originalSize to be known in advance. + * The function never writes past the output buffer. + * However, since it doesn't know its 'src' size, it may read past the intended input. + * Also, because match offsets are not validated during decoding, + * reads from 'src' may underflow. + * Use this function in trusted environment **only**. + */ LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize); -/*! -LZ4_decompress_safe_partial() : - This function decompress a compressed block of size 'srcSize' at position 'src' - into destination buffer 'dst' of size 'dstCapacity'. - The function will decompress a minimum of 'targetOutputSize' bytes, and stop after that. - However, it's not accurate, and may write more than 'targetOutputSize' (but always <= dstCapacity). - @return : the number of bytes decoded in the destination buffer (necessarily <= dstCapacity) - Note : this number can also be < targetOutputSize, if compressed block contains less data. - Therefore, always control how many bytes were decoded. - If source stream is detected malformed, function returns a negative result. - This function is protected against malicious data packets. -*/ +/*! LZ4_decompress_safe_partial() : + * Decompress an LZ4 compressed block, of size 'srcSize' at position 'src', + * into destination buffer 'dst' of size 'dstCapacity'. + * Up to 'targetOutputSize' bytes will be decoded. + * The function stops decoding on reaching this objective, + * which can boost performance when only the beginning of a block is required. + * + * @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity) + * If source stream is detected malformed, function returns a negative result. + * + * Note : @return can be < targetOutputSize, if compressed block contains less data. + * + * Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity, + * and expects targetOutputSize <= dstCapacity. + * It effectively stops decoding on reaching targetOutputSize, + * so dstCapacity is kind of redundant. + * This is because in a previous version of this function, + * decoding operation would not "break" a sequence in the middle. + * As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize, + * it could write more bytes, though only up to dstCapacity. + * Some "margin" used to be required for this operation to work properly. + * This is no longer necessary. + * The function nonetheless keeps its signature, in an effort to not break API. + */ LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity); @@ -266,16 +283,23 @@ LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, in * 'dst' buffer must be already allocated. * If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster. * - * Important : The previous 64KB of compressed data is assumed to remain present and unmodified in memory! - * - * Special 1 : When input is a double-buffer, they can have any size, including < 64 KB. - * Make sure that buffers are separated by at least one byte. - * This way, each block only depends on previous block. - * Special 2 : If input buffer is a ring-buffer, it can have any size, including < 64 KB. - * * @return : size of compressed block * or 0 if there is an error (typically, cannot fit into 'dst'). - * After an error, the stream status is invalid, it can only be reset or freed. + * + * Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block. + * Each block has precise boundaries. + * It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together. + * Each block must be decompressed separately, calling LZ4_decompress_*() with associated metadata. + * + * Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory! + * + * Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB. + * Make sure that buffers are separated, by at least one byte. + * This construction ensures that each block only depends on previous block. + * + * Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB. + * + * Note 5 : After an error, the stream status is invalid, it can only be reset or freed. */ LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); @@ -305,7 +329,7 @@ LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_str /*! LZ4_setStreamDecode() : * An LZ4_streamDecode_t context can be allocated once and re-used multiple times. * Use this function to start decompression of a new stream of blocks. - * A dictionary can optionnally be set. Use NULL or size 0 for a reset order. + * A dictionary can optionally be set. Use NULL or size 0 for a reset order. * Dictionary is presumed stable : it must remain accessible and unmodified during next decompression. * @return : 1 if OK, 0 if error */ diff --git a/lib/lz4frame.c b/lib/lz4frame.c index e1d0b1d02..08bf0faee 100644 --- a/lib/lz4frame.c +++ b/lib/lz4frame.c @@ -738,7 +738,7 @@ static size_t LZ4F_makeBlock(void* dst, const void* src, size_t srcSize, static int LZ4F_compressBlock(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict) { - int const acceleration = (level < -1) ? -level : 1; + int const acceleration = (level < 0) ? -level + 1 : 1; LZ4F_initStream(ctx, cdict, level, LZ4F_blockIndependent); if (cdict) { return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, acceleration); @@ -749,7 +749,7 @@ static int LZ4F_compressBlock(void* ctx, const char* src, char* dst, int srcSize static int LZ4F_compressBlock_continue(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict) { - int const acceleration = (level < -1) ? -level : 1; + int const acceleration = (level < 0) ? -level + 1 : 1; (void)cdict; /* init once at beginning of frame */ return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, acceleration); } diff --git a/lib/lz4frame.h b/lib/lz4frame.h index fb434ff76..75f1fd91b 100644 --- a/lib/lz4frame.h +++ b/lib/lz4frame.h @@ -33,9 +33,10 @@ */ /* LZ4F is a stand-alone API to create LZ4-compressed frames - * conformant with specification v1.5.1. + * conformant with specification v1.6.1. * It also offers streaming capabilities. - * lz4.h is not required when using lz4frame.h. + * lz4.h is not required when using lz4frame.h, + * except to get constant such as LZ4_VERSION_NUMBER. * */ #ifndef LZ4F_H_09782039843 @@ -159,8 +160,9 @@ typedef LZ4F_contentChecksum_t contentChecksum_t; /*! LZ4F_frameInfo_t : * makes it possible to set or read frame parameters. - * It's not required to set all fields, as long as the structure was initially memset() to zero. - * For all fields, 0 sets it to default value */ + * Structure must be first init to 0, using memset() or LZ4F_INIT_FRAMEINFO, + * setting all parameters to default. + * It's then possible to update selectively some parameters */ typedef struct { LZ4F_blockSizeID_t blockSizeID; /* max64KB, max256KB, max1MB, max4MB; 0 == default */ LZ4F_blockMode_t blockMode; /* LZ4F_blockLinked, LZ4F_blockIndependent; 0 == default */ @@ -171,24 +173,30 @@ typedef struct { LZ4F_blockChecksum_t blockChecksumFlag; /* 1: each block followed by a checksum of block's compressed data; 0: disabled (default) */ } LZ4F_frameInfo_t; +#define LZ4F_INIT_FRAMEINFO { 0, 0, 0, 0, 0, 0, 0 } /* v1.8.3+ */ + /*! LZ4F_preferences_t : - * makes it possible to supply detailed compression parameters to the stream interface. - * Structure is presumed initially memset() to zero, representing default settings. + * makes it possible to supply advanced compression instructions to streaming interface. + * Structure must be first init to 0, using memset() or LZ4F_INIT_PREFERENCES, + * setting all parameters to default. * All reserved fields must be set to zero. */ typedef struct { LZ4F_frameInfo_t frameInfo; int compressionLevel; /* 0: default (fast mode); values > LZ4HC_CLEVEL_MAX count as LZ4HC_CLEVEL_MAX; values < 0 trigger "fast acceleration" */ - unsigned autoFlush; /* 1: always flush, to reduce usage of internal buffers */ - unsigned favorDecSpeed; /* 1: parser favors decompression speed vs compression ratio. Only works for high compression modes (>= LZ4LZ4HC_CLEVEL_OPT_MIN) */ /* >= v1.8.2 */ + unsigned autoFlush; /* 1: always flush; reduces usage of internal buffers */ + unsigned favorDecSpeed; /* 1: parser favors decompression speed vs compression ratio. Only works for high compression modes (>= LZ4HC_CLEVEL_OPT_MIN) */ /* v1.8.2+ */ unsigned reserved[3]; /* must be zero for forward compatibility */ } LZ4F_preferences_t; -LZ4FLIB_API int LZ4F_compressionLevel_max(void); +#define LZ4F_INIT_PREFERENCES { LZ4F_INIT_FRAMEINFO, 0, 0, 0, { 0, 0, 0 } } /* v1.8.3+ */ /*-********************************* * Simple compression function ***********************************/ + +LZ4FLIB_API int LZ4F_compressionLevel_max(void); + /*! LZ4F_compressFrameBound() : * Returns the maximum possible compressed size with LZ4F_compressFrame() given srcSize and preferences. * `preferencesPtr` is optional. It can be replaced by NULL, in which case, the function will assume default preferences. @@ -222,8 +230,9 @@ typedef struct { /*--- Resource Management ---*/ -#define LZ4F_VERSION 100 +#define LZ4F_VERSION 100 /* This number can be used to check for an incompatible API breaking change */ LZ4FLIB_API unsigned LZ4F_getVersion(void); + /*! LZ4F_createCompressionContext() : * The first thing to do is to create a compressionContext object, which will be used in all compression operations. * This is achieved using LZ4F_createCompressionContext(), which takes as argument a version. diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 8108ea011..e913ee7b3 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -327,6 +327,8 @@ LZ4HC_InsertAndGetWiderMatch ( if (lookBackLength==0) { /* no back possible */ size_t const maxML = MIN(currentSegmentLength, srcPatternLength); if ((size_t)longest < maxML) { + assert(base + matchIndex < ip); + if (ip - (base+matchIndex) > MAX_DISTANCE) break; assert(maxML < 2 GB); longest = (int)maxML; *matchpos = base + matchIndex; /* virtual pos, relative to ip, to retrieve offset */ @@ -450,6 +452,7 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( *op += length; /* Encode Offset */ + assert( (*ip - match) <= MAX_DISTANCE ); /* note : consider providing offset as a value, rather than as a pointer difference */ LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2; /* Encode MatchLength */ diff --git a/lib/lz4hc.h b/lib/lz4hc.h index bb5e07373..970fa3966 100644 --- a/lib/lz4hc.h +++ b/lib/lz4hc.h @@ -246,6 +246,10 @@ LZ4_DEPRECATED("use LZ4_resetStreamHC() instead") LZ4LIB_API int LZ4_resetStr #ifndef LZ4_HC_SLO_098092834 #define LZ4_HC_SLO_098092834 +#if defined (__cplusplus) +extern "C" { +#endif + /*! LZ4_compress_HC_destSize() : v1.8.0 (experimental) * Will try to compress as much data from `src` as possible * that can fit into `targetDstSize` budget. @@ -343,5 +347,9 @@ int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* ds */ LZ4LIB_API void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream); +#if defined (__cplusplus) +} +#endif + #endif /* LZ4_HC_SLO_098092834 */ #endif /* LZ4_HC_STATIC_LINKING_ONLY */ diff --git a/programs/Makefile b/programs/Makefile index 72bdcaac1..bd33d9be0 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -82,7 +82,7 @@ lz4-release: DEBUGFLAGS= lz4-release: lz4 lz4c: lz4 - ln -s lz4 lz4c + ln -s lz4$(EXT) lz4c$(EXT) lz4c32: CFLAGS += -m32 lz4c32 : $(SRCFILES) @@ -102,20 +102,20 @@ preview-man: clean-man man clean: @$(MAKE) -C $(LZ4DIR) $@ > $(VOID) @$(RM) core *.o *.test tmp* \ - lz4$(EXT) lz4c$(EXT) lz4c32$(EXT) unlz4 lz4cat + lz4$(EXT) lz4c$(EXT) lz4c32$(EXT) unlz4$(EXT) lz4cat$(EXT) @echo Cleaning completed #----------------------------------------------------------------------------- # make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets #----------------------------------------------------------------------------- -ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS)) +ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD)) unlz4: lz4 - ln -s lz4 unlz4 + ln -s lz4$(EXT) unlz4$(EXT) lz4cat: lz4 - ln -s lz4 lz4cat + ln -s lz4$(EXT) lz4cat$(EXT) DESTDIR ?= # directory variables : GNU conventions prefer lowercase @@ -147,10 +147,10 @@ INSTALL_DATA ?= $(INSTALL) -m 644 install: lz4 @echo Installing binaries @$(INSTALL) -d -m 755 $(DESTDIR)$(bindir)/ $(DESTDIR)$(man1dir)/ - @$(INSTALL_PROGRAM) lz4 $(DESTDIR)$(bindir)/lz4 - @ln -sf lz4 $(DESTDIR)$(bindir)/lz4c - @ln -sf lz4 $(DESTDIR)$(bindir)/lz4cat - @ln -sf lz4 $(DESTDIR)$(bindir)/unlz4 + @$(INSTALL_PROGRAM) lz4$(EXT) $(DESTDIR)$(bindir)/lz4$(EXT) + @ln -sf lz4$(EXT) $(DESTDIR)$(bindir)/lz4c$(EXT) + @ln -sf lz4$(EXT) $(DESTDIR)$(bindir)/lz4cat$(EXT) + @ln -sf lz4$(EXT) $(DESTDIR)$(bindir)/unlz4$(EXT) @echo Installing man pages @$(INSTALL_DATA) lz4.1 $(DESTDIR)$(man1dir)/lz4.1 @ln -sf lz4.1 $(DESTDIR)$(man1dir)/lz4c.1 @@ -159,10 +159,10 @@ install: lz4 @echo lz4 installation completed uninstall: - @$(RM) $(DESTDIR)$(bindir)/lz4cat - @$(RM) $(DESTDIR)$(bindir)/unlz4 - @$(RM) $(DESTDIR)$(bindir)/lz4 - @$(RM) $(DESTDIR)$(bindir)/lz4c + @$(RM) $(DESTDIR)$(bindir)/lz4cat$(EXT) + @$(RM) $(DESTDIR)$(bindir)/unlz4$(EXT) + @$(RM) $(DESTDIR)$(bindir)/lz4$(EXT) + @$(RM) $(DESTDIR)$(bindir)/lz4c$(EXT) @$(RM) $(DESTDIR)$(man1dir)/lz4.1 @$(RM) $(DESTDIR)$(man1dir)/lz4c.1 @$(RM) $(DESTDIR)$(man1dir)/lz4cat.1 diff --git a/programs/bench.c b/programs/bench.c index 770191cfa..11bf0440c 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -49,7 +49,10 @@ #include "lz4.h" #define COMPRESSOR0 LZ4_compress_local -static int LZ4_compress_local(const char* src, char* dst, int srcSize, int dstSize, int clevel) { (void)clevel; return LZ4_compress_default(src, dst, srcSize, dstSize); } +static int LZ4_compress_local(const char* src, char* dst, int srcSize, int dstSize, int clevel) { + int const acceleration = (clevel < 0) ? -clevel + 1 : 1; + return LZ4_compress_fast(src, dst, srcSize, dstSize, acceleration); +} #include "lz4hc.h" #define COMPRESSOR1 LZ4_compress_HC #define DEFAULTCOMPRESSOR COMPRESSOR0 @@ -326,7 +329,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, { U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); if (crcOrig!=crcCheck) { size_t u; - DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); + DISPLAY("\n!!! WARNING !!! %17s : Invalid Checksum : %x != %x \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); for (u=0; u %.2f%%\n", - filesize, compressedfilesize, (double)compressedfilesize/(filesize + !filesize)*100); /* avoid division by zero */ + filesize, compressedfilesize, + (double)compressedfilesize / (filesize + !filesize /* avoid division by zero */ ) * 100); return 0; } @@ -645,21 +652,25 @@ static int LZ4IO_compressFilename_extRess(cRess_t ress, const char* srcFileName, int LZ4IO_compressFilename(const char* srcFileName, const char* dstFileName, int compressionLevel) { - clock_t const start = clock(); + UTIL_time_t const timeStart = UTIL_getTime(); + clock_t const cpuStart = clock(); cRess_t const ress = LZ4IO_createCResources(); - int const issueWithSrcFile = LZ4IO_compressFilename_extRess(ress, srcFileName, dstFileName, compressionLevel); + int const result = LZ4IO_compressFilename_extRess(ress, srcFileName, dstFileName, compressionLevel); /* Free resources */ LZ4IO_freeCResources(ress); /* Final Status */ - { clock_t const end = clock(); - double const seconds = (double)(end - start) / CLOCKS_PER_SEC; - DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds); + { clock_t const cpuEnd = clock(); + double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC; + U64 const timeLength_ns = UTIL_clockSpanNano(timeStart); + double const timeLength_s = (double)timeLength_ns / 1000000000; + DISPLAYLEVEL(4, "Completed in %.2f sec (cpu load : %.0f%%)\n", + timeLength_s, (cpuLoad_s / timeLength_s) * 100); } - return issueWithSrcFile; + return result; } diff --git a/programs/platform.h b/programs/platform.h index db2efac88..c0b384020 100644 --- a/programs/platform.h +++ b/programs/platform.h @@ -81,7 +81,7 @@ extern "C" { #if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)) /* UNIX-like OS */ \ || defined(__midipix__) || defined(__VMS)) # if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1–2001 (SUSv3) conformant */ \ - || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) /* BSD distros */ + || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__MidnightBSD__) /* BSD distros */ # define PLATFORM_POSIX_VERSION 200112L # else # if defined(__linux__) || defined(__linux) diff --git a/tests/.gitignore b/tests/.gitignore index 36dff4207..9aa42a064 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -1,5 +1,5 @@ -# test build artefacts +# build artefacts datagen frametest frametest32 @@ -8,8 +8,12 @@ fullbench32 fuzzer fuzzer32 fasttest +roundTripTest checkTag # test artefacts tmp* versionsTest + +# local tests +afl diff --git a/tests/Makefile b/tests/Makefile index d2385614a..3de111b8d 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -63,7 +63,7 @@ NB_LOOPS ?= -i1 default: all -all: fullbench fuzzer frametest datagen +all: fullbench fuzzer frametest roundTripTest datagen all32: CFLAGS+=-m32 all32: all @@ -103,6 +103,9 @@ fuzzer : lz4.o lz4hc.o xxhash.o fuzzer.c frametest: lz4frame.o lz4.o lz4hc.o xxhash.o frametest.c $(CC) $(FLAGS) $^ -o $@$(EXT) +roundTripTest : lz4.o lz4hc.o xxhash.o roundTripTest.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + datagen : $(PRGDIR)/datagen.c datagencli.c $(CC) $(FLAGS) -I$(PRGDIR) $^ -o $@$(EXT) @@ -114,7 +117,8 @@ clean: fullbench$(EXT) fullbench32$(EXT) \ fuzzer$(EXT) fuzzer32$(EXT) \ frametest$(EXT) frametest32$(EXT) \ - fasttest$(EXT) datagen$(EXT) checkTag$(EXT) + fasttest$(EXT) roundTripTest$(EXT) \ + datagen$(EXT) checkTag$(EXT) @rm -fR $(TESTDIR) @echo Cleaning completed @@ -129,7 +133,7 @@ checkTag: checkTag.c $(LZ4DIR)/lz4.h #----------------------------------------------------------------------------- # validated only for Linux, OSX, BSD, Hurd and Solaris targets #----------------------------------------------------------------------------- -ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS)) +ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD)) MD5:=md5sum ifneq (,$(filter $(shell uname), Darwin )) @@ -262,8 +266,17 @@ test-lz4-basic: lz4 datagen unlz4 lz4cat cat tmp-tlb-hw >> tmp-tlb-hw.lz4 $(LZ4) -f tmp-tlb-hw.lz4 # uncompress valid frame followed by invalid data $(LZ4) -BX tmp-tlb-hw -c -q | $(LZ4) -tv # test block checksum + # ./datagen -g20KB generates the same file every single time + # cannot save output of ./datagen -g20KB as input file to lz4 because the following shell commands are run before ./datagen -g20KB + test "$(shell ./datagen -g20KB | $(LZ4) -c --fast | wc -c)" -lt "$(shell ./datagen -g20KB | $(LZ4) -c --fast=9 | wc -c)" # -1 vs -9 + test "$(shell ./datagen -g20KB | $(LZ4) -c -1 | wc -c)" -lt "$(shell ./datagen -g20KB| $(LZ4) -c --fast=1 | wc -c)" # 1 vs -1 + test "$(shell ./datagen -g20KB | $(LZ4) -c --fast=1 | wc -c)" -eq "$(shell ./datagen -g20KB| $(LZ4) -c --fast| wc -c)" # checks default fast compression is -1 + ! $(LZ4) -c --fast=0 tmp-tlb-dg20K # lz4 should fail when fast=0 + ! $(LZ4) -c --fast=-1 tmp-tlb-dg20K # lz4 should fail when fast=-1 @$(RM) tmp-tlb* + + test-lz4-dict: lz4 datagen @echo "\n ---- test lz4 compression/decompression with dictionary ----" ./datagen -g16KB > tmp-dict diff --git a/tests/fullbench.c b/tests/fullbench.c index c06e2301d..fd1202df1 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -220,8 +220,16 @@ static int local_LZ4_compress_fast_continue0(const char* in, char* out, int inSi } #ifndef LZ4_DLL_IMPORT +#if defined (__cplusplus) +extern "C" { +#endif + /* declare hidden function */ -int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize); +extern int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize); + +#if defined (__cplusplus) +} +#endif static int local_LZ4_compress_forceDict(const char* in, char* out, int inSize) { @@ -289,8 +297,16 @@ static int local_LZ4_decompress_safe_usingDict(const char* in, char* out, int in } #ifndef LZ4_DLL_IMPORT +#if defined (__cplusplus) +extern "C" { +#endif + extern int LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize, const void* dict, size_t dictSize); +#if defined (__cplusplus) +} +#endif + static int local_LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize) { (void)inSize; @@ -301,7 +317,9 @@ static int local_LZ4_decompress_safe_forceExtDict(const char* in, char* out, int static int local_LZ4_decompress_safe_partial(const char* in, char* out, int inSize, int outSize) { - return LZ4_decompress_safe_partial(in, out, inSize, outSize - 5, outSize); + int result = LZ4_decompress_safe_partial(in, out, inSize, outSize - 5, outSize); + if (result < 0) return result; + return outSize; } @@ -446,9 +464,9 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles) case 12: compressionFunction = local_LZ4_compress_HC_extStateHC; compressorName = "LZ4_compress_HC_extStateHC"; break; case 14: compressionFunction = local_LZ4_compress_HC_continue; initFunction = local_LZ4_resetStreamHC; compressorName = "LZ4_compress_HC_continue"; break; #ifndef LZ4_DLL_IMPORT - case 20: compressionFunction = local_LZ4_compress_forceDict; initFunction = local_LZ4_resetDictT; compressorName = "LZ4_compress_forceDict"; break; + case 20: compressionFunction = local_LZ4_compress_forceDict; initFunction = local_LZ4_resetDictT; compressorName = "LZ4_compress_forceDict"; break; #endif - case 30: compressionFunction = local_LZ4F_compressFrame; compressorName = "LZ4F_compressFrame"; + case 30: compressionFunction = local_LZ4F_compressFrame; compressorName = "LZ4F_compressFrame"; chunkP[0].origSize = (int)benchedSize; nbChunks=1; break; case 40: compressionFunction = local_LZ4_saveDict; compressorName = "LZ4_saveDict"; @@ -526,6 +544,7 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles) const char* dName; int (*decompressionFunction)(const char*, char*, int, int); double bestTime = 100000000.; + int checkResult = 1; if ((g_decompressionAlgo != ALL_DECOMPRESSORS) && (g_decompressionAlgo != dAlgNb)) continue; @@ -537,11 +556,11 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles) case 3: decompressionFunction = local_LZ4_decompress_fast_usingExtDict; dName = "LZ4_decompress_fast_using(Ext)Dict"; break; case 4: decompressionFunction = LZ4_decompress_safe; dName = "LZ4_decompress_safe"; break; case 6: decompressionFunction = local_LZ4_decompress_safe_usingDict; dName = "LZ4_decompress_safe_usingDict"; break; - case 7: decompressionFunction = local_LZ4_decompress_safe_partial; dName = "LZ4_decompress_safe_partial"; break; + case 7: decompressionFunction = local_LZ4_decompress_safe_partial; dName = "LZ4_decompress_safe_partial"; checkResult = 0; break; #ifndef LZ4_DLL_IMPORT - case 8: decompressionFunction = local_LZ4_decompress_safe_forceExtDict; dName = "LZ4_decompress_safe_forceExtDict"; break; + case 8: decompressionFunction = local_LZ4_decompress_safe_forceExtDict; dName = "LZ4_decompress_safe_forceExtDict"; break; #endif - case 9: decompressionFunction = local_LZ4F_decompress; dName = "LZ4F_decompress"; + case 9: decompressionFunction = local_LZ4F_decompress; dName = "LZ4F_decompress"; errorCode = LZ4F_compressFrame(compressed_buff, compressedBuffSize, orig_buff, benchedSize, NULL); if (LZ4F_isError(errorCode)) { DISPLAY("Error while preparing compressed frame\n"); @@ -573,9 +592,13 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles) clockTime = clock(); while(BMK_GetClockSpan(clockTime) < TIMELOOP) { for (chunkNb=0; chunkNb %7.1f MB/s\n", dAlgNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000); } diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 5dd75b389..b29e82e4e 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -38,7 +38,7 @@ /*-************************************ * Dependencies **************************************/ -#ifdef __unix__ /* must be included before platform.h for MAP_ANONYMOUS */ +#if defined(__unix__) && !defined(_AIX) /* must be included before platform.h for MAP_ANONYMOUS */ # include /* mmap */ #endif #include "platform.h" /* _CRT_SECURE_NO_WARNINGS */ @@ -48,6 +48,10 @@ #include /* strcmp */ #include /* clock_t, clock, CLOCKS_PER_SEC */ #include +#if defined(__unix__) && defined(_AIX) +# include /* mmap */ +#endif + #define LZ4_STATIC_LINKING_ONLY #define LZ4_HC_STATIC_LINKING_ONLY #include "lz4hc.h" @@ -319,12 +323,17 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c int result = 0; unsigned cycleNb; -# define FUZ_CHECKTEST(cond, ...) if (cond) { printf("Test %u : ", testNb); printf(__VA_ARGS__); \ - printf(" (seed %u, cycle %u) \n", seed, cycleNb); goto _output_error; } +# define FUZ_CHECKTEST(cond, ...) \ + if (cond) { \ + printf("Test %u : ", testNb); printf(__VA_ARGS__); \ + printf(" (seed %u, cycle %u) \n", seed, cycleNb); \ + goto _output_error; \ + } + # define FUZ_DISPLAYTEST(...) { \ testNb++; \ if (g_displayLevel>=4) { \ - printf("\r%4u - %2u ", cycleNb, testNb); \ + printf("\r%4u - %2u :", cycleNb, testNb); \ printf(" " __VA_ARGS__); \ printf(" "); \ fflush(stdout); \ @@ -495,7 +504,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c /* Test decoding with empty input */ FUZ_DISPLAYTEST("LZ4_decompress_safe() with empty input"); - LZ4_decompress_safe(NULL, decodedBuffer, 0, blockSize); + LZ4_decompress_safe(compressedBuffer, decodedBuffer, 0, blockSize); /* Test decoding with a one byte input */ FUZ_DISPLAYTEST("LZ4_decompress_safe() with one byte input"); @@ -536,7 +545,6 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, blockSize+1); FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe failed despite amply sufficient space"); FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe did not regenerate original data"); - //FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe wrote more than (unknown) target size"); // well, is that an issue ? FUZ_CHECKTEST(decodedBuffer[blockSize+1], "LZ4_decompress_safe overrun specified output buffer size"); { U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0); FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe corrupted decoded data"); @@ -570,15 +578,16 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to input size being too large"); FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe overrun specified output buffer size"); - // Test partial decoding with target output size being max/2 => must work - FUZ_DISPLAYTEST(); - ret = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, compressedSize, blockSize/2, blockSize); - FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe_partial failed despite sufficient space"); - - // Test partial decoding with target output size being just below max => must work - FUZ_DISPLAYTEST(); - ret = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, compressedSize, blockSize-3, blockSize); - FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe_partial failed despite sufficient space"); + /* Test partial decoding => must work */ + FUZ_DISPLAYTEST("test LZ4_decompress_safe_partial"); + { size_t const missingBytes = FUZ_rand(&randState) % blockSize; + int const targetSize = (int)(blockSize - missingBytes); + char const sentinel = decodedBuffer[targetSize] = block[targetSize] ^ 0x5A; + int const decResult = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, compressedSize, targetSize, blockSize); + FUZ_CHECKTEST(decResult<0, "LZ4_decompress_safe_partial failed despite valid input data (error:%i)", decResult); + FUZ_CHECKTEST(decResult != targetSize, "LZ4_decompress_safe_partial did not regenerated required amount of data (%i < %i <= %i)", decResult, targetSize, blockSize); + FUZ_CHECKTEST(decodedBuffer[targetSize] != sentinel, "LZ4_decompress_safe_partial overwrite beyond requested size (though %i <= %i <= %i)", decResult, targetSize, blockSize); + } /* Test Compression with limited output size */ @@ -801,7 +810,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe_usingDict should have failed : not enough output size (-1 byte)"); FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_safe_usingDict overrun specified output buffer size"); - FUZ_DISPLAYTEST(); + FUZ_DISPLAYTEST("LZ4_decompress_safe_usingDict with a too small output buffer"); { U32 const missingBytes = (FUZ_rand(&randState) & 0xF) + 2; if ((U32)blockSize > missingBytes) { decodedBuffer[blockSize-missingBytes] = 0; @@ -811,7 +820,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c } } /* Compress HC using External dictionary */ - FUZ_DISPLAYTEST(); + FUZ_DISPLAYTEST("LZ4_compress_HC_continue with an external dictionary"); dict -= (FUZ_rand(&randState) & 7); /* even bigger separation */ if (dict < (char*)CNBuffer) dict = (char*)CNBuffer; LZ4_resetStreamHC (&LZ4dictHC, compressionLevel); diff --git a/tests/roundTripTest.c b/tests/roundTripTest.c new file mode 100644 index 000000000..2d344518e --- /dev/null +++ b/tests/roundTripTest.c @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* + * This program takes a file in input, + * performs an LZ4 round-trip test (compress + decompress) + * compares the result with original + * and generates an abort() on corruption detection, + * in order for afl to register the event as a crash. +*/ + + +/*=========================================== +* Tuning Constant +*==========================================*/ +#ifndef MIN_CLEVEL +# define MIN_CLEVEL (int)(-5) +#endif + + + +/*=========================================== +* Dependencies +*==========================================*/ +#include /* size_t */ +#include /* malloc, free, exit */ +#include /* fprintf */ +#include /* strcmp */ +#include +#include /* stat */ +#include /* stat */ +#include "xxhash.h" + +#include "lz4.h" +#include "lz4hc.h" + + +/*=========================================== +* Macros +*==========================================*/ +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +#define MSG(...) fprintf(stderr, __VA_ARGS__) + +#define CONTROL_MSG(c, ...) { \ + if ((c)) { \ + MSG(__VA_ARGS__); \ + MSG(" \n"); \ + abort(); \ + } \ +} + + +static size_t checkBuffers(const void* buff1, const void* buff2, size_t buffSize) +{ + const char* const ip1 = (const char*)buff1; + const char* const ip2 = (const char*)buff2; + size_t pos; + + for (pos=0; pos = LZ4_compressBound(srcSize)` + * for compression to be guaranteed to work */ +static void roundTripTest(void* resultBuff, size_t resultBuffCapacity, + void* compressedBuff, size_t compressedBuffCapacity, + const void* srcBuff, size_t srcSize, + int clevel) +{ + int const proposed_clevel = clevel ? clevel : select_clevel(srcBuff, srcSize); + int const selected_clevel = proposed_clevel < 0 ? -proposed_clevel : proposed_clevel; /* if level < 0, it becomes an accelearion value */ + compressFn compress = selected_clevel >= LZ4HC_CLEVEL_MIN ? LZ4_compress_HC : LZ4_compress_fast; + int const cSize = compress((const char*)srcBuff, (char*)compressedBuff, (int)srcSize, (int)compressedBuffCapacity, selected_clevel); + CONTROL_MSG(cSize == 0, "Compression error !"); + + { int const dSize = LZ4_decompress_safe((const char*)compressedBuff, (char*)resultBuff, cSize, (int)resultBuffCapacity); + CONTROL_MSG(dSize < 0, "Decompression detected an error !"); + CONTROL_MSG(dSize != (int)srcSize, "Decompression corruption error : wrong decompressed size !"); + } + + /* check potential content corruption error */ + assert(resultBuffCapacity >= srcSize); + { size_t const errorPos = checkBuffers(srcBuff, resultBuff, srcSize); + CONTROL_MSG(errorPos != srcSize, + "Silent decoding corruption, at pos %u !!!", + (unsigned)errorPos); + } + +} + +static void roundTripCheck(const void* srcBuff, size_t srcSize, int clevel) +{ + size_t const cBuffSize = LZ4_compressBound((int)srcSize); + void* const cBuff = malloc(cBuffSize); + void* const rBuff = malloc(cBuffSize); + + if (!cBuff || !rBuff) { + fprintf(stderr, "not enough memory ! \n"); + exit(1); + } + + roundTripTest(rBuff, cBuffSize, + cBuff, cBuffSize, + srcBuff, srcSize, + clevel); + + free(rBuff); + free(cBuff); +} + + +static size_t getFileSize(const char* infilename) +{ + int r; +#if defined(_MSC_VER) + struct _stat64 statbuf; + r = _stat64(infilename, &statbuf); + if (r || !(statbuf.st_mode & S_IFREG)) return 0; /* No good... */ +#else + struct stat statbuf; + r = stat(infilename, &statbuf); + if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */ +#endif + return (size_t)statbuf.st_size; +} + + +static int isDirectory(const char* infilename) +{ + int r; +#if defined(_MSC_VER) + struct _stat64 statbuf; + r = _stat64(infilename, &statbuf); + if (!r && (statbuf.st_mode & _S_IFDIR)) return 1; +#else + struct stat statbuf; + r = stat(infilename, &statbuf); + if (!r && S_ISDIR(statbuf.st_mode)) return 1; +#endif + return 0; +} + + +/** loadFile() : + * requirement : `buffer` size >= `fileSize` */ +static void loadFile(void* buffer, const char* fileName, size_t fileSize) +{ + FILE* const f = fopen(fileName, "rb"); + if (isDirectory(fileName)) { + MSG("Ignoring %s directory \n", fileName); + exit(2); + } + if (f==NULL) { + MSG("Impossible to open %s \n", fileName); + exit(3); + } + { size_t const readSize = fread(buffer, 1, fileSize, f); + if (readSize != fileSize) { + MSG("Error reading %s \n", fileName); + exit(5); + } } + fclose(f); +} + + +static void fileCheck(const char* fileName, int clevel) +{ + size_t const fileSize = getFileSize(fileName); + void* const buffer = malloc(fileSize + !fileSize /* avoid 0 */); + if (!buffer) { + MSG("not enough memory \n"); + exit(4); + } + loadFile(buffer, fileName, fileSize); + roundTripCheck(buffer, fileSize, clevel); + free (buffer); +} + + +int bad_usage(const char* exeName) +{ + MSG(" \n"); + MSG("bad usage: \n"); + MSG(" \n"); + MSG("%s [Options] fileName \n", exeName); + MSG(" \n"); + MSG("Options: \n"); + MSG("-# : use #=[0-9] compression level (default:0 == random) \n"); + return 1; +} + + +int main(int argCount, const char** argv) +{ + const char* const exeName = argv[0]; + int argNb = 1; + int clevel = 0; + + assert(argCount >= 1); + if (argCount < 2) return bad_usage(exeName); + + if (argv[1][0] == '-') { + clevel = argv[1][1] - '0'; + argNb = 2; + } + + if (argNb >= argCount) return bad_usage(exeName); + + fileCheck(argv[argNb], clevel); + MSG("no pb detected \n"); + return 0; +}