Merge pull request lz4#531 from lz4/dev

Preparing v1.8.2
xamarin · May 7, 2018 · b3692db · b3692db
2 parents dfed9fa + bf6fd93
commit b3692db
Show file tree

Hide file tree

Showing 50 changed files with 5,037 additions and 1,985 deletions.
diff --git a/.gitignore b/.gitignore
@@ -29,3 +29,7 @@ bin/
 # Mac
 .DS_Store
 *.dSYM
+
+# Windows / Msys
+nul
+ld.exe*
diff --git a/.travis.yml b/.travis.yml
@@ -14,12 +14,12 @@ matrix:
       env: Ubu=12.04cont Cmd='make -C tests test-lz4 test-lz4c test-fullbench' COMPILER=cc
 
     - os: linux
-      sudo: false
-      env: Ubu=12.04cont Cmd='make -C tests test-frametest test-fuzzer' COMPILER=cc
+      sudo: required
+      env: Ubu=12.04cont Cmd='sudo sysctl -w vm.mmap_min_addr="4096" && make -C tests test-frametest test-fuzzer' COMPILER=cc
 
     - os: linux
       sudo: false
-      env: Ubu=12.04cont Cmd="make gpptest && make clean examples && make clean cmake && make clean travis-install && make clean clangtest" COMPILER=cc
+      env: Ubu=12.04cont Cmd="make gpptest && make clean && make examples && make clean cmake && make clean travis-install && make clean clangtest" COMPILER=cc
 
 
     # 14.04 LTS Server Edition 64 bit
@@ -59,7 +59,7 @@ matrix:
             - libc6-dev-i386
             - gcc-multilib
 
-    - env: Ubu=14.04 Cmd='make -C tests test-frametest32 test-fuzzer32' COMPILER=cc
+    - env: Ubu=14.04 Cmd='sudo sysctl -w vm.mmap_min_addr="4096" && make -C tests test-frametest32 test-fuzzer32' COMPILER=cc
       dist: trusty
       sudo: required
       addons:
@@ -145,7 +145,15 @@ matrix:
             - gcc-multilib
             - gcc-4.4
 
+    # tag-specific test
+    - if: tag =~ ^v[0-9]\.[0-9]
+      os: linux
+      sudo: false
+      env: Cmd="make -C tests checkTag && tests/checkTag $TRAVIS_BRANCH " COMPILER=cc
+
+
 script:
+  - uname -a
   - echo Cmd=$Cmd
   - $COMPILER -v
   - sh -c "$Cmd"
diff --git a/Makefile b/Makefile
@@ -1,10 +1,8 @@
 # ################################################################
 # LZ4 - Makefile
-# Copyright (C) Yann Collet 2011-2016
+# Copyright (C) Yann Collet 2011-present
 # All rights reserved.
 #
-# This Makefile is validated for Linux, macOS, *BSD, Hurd, Solaris, MSYS2 targets
-#
 # BSD license
 # Redistribution and use in source and binary forms, with or without modification,
 # are permitted provided that the following conditions are met:
@@ -58,6 +56,7 @@ all: allmost manuals
 allmost: lib lz4 examples
 
 .PHONY: lib lib-release liblz4.a
+lib: liblz4.a
 lib lib-release liblz4.a:
 	@$(MAKE) -C $(LZ4DIR) $@
 
@@ -69,8 +68,8 @@ lz4 lz4-release :
 	@cp $(PRGDIR)/lz4$(EXT) .
 
 .PHONY: examples
-examples: lib lz4
-	$(MAKE) -C $(EXDIR) test
+examples: liblz4.a
+	$(MAKE) -C $(EXDIR) all
 
 .PHONY: manuals
 manuals:
@@ -122,9 +121,14 @@ ifneq (,$(filter $(HOST_OS),MSYS POSIX))
 list:
 	@$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs
 
+.PHONY: check
+check:
+	$(MAKE) -C $(TESTDIR) test-lz4-essentials
+
 .PHONY: test
 test:
 	$(MAKE) -C $(TESTDIR) $@
+	$(MAKE) -C $(EXDIR) $@
 
 clangtest: clean
 	clang -v
@@ -139,10 +143,10 @@ clangtest-native: clean
 	@CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" $(MAKE) -C $(TESTDIR) native CC=clang
 
 usan: clean
-	CC=clang CFLAGS="-O3 -g -fsanitize=undefined" $(MAKE) test FUZZER_TIME="-T1mn" NB_LOOPS=-i1
+	CC=clang CFLAGS="-O3 -g -fsanitize=undefined" $(MAKE) test FUZZER_TIME="-T30s" NB_LOOPS=-i1
 
 usan32: clean
-	CFLAGS="-m32 -O3 -g -fsanitize=undefined" $(MAKE) test FUZZER_TIME="-T1mn" NB_LOOPS=-i1
+	CFLAGS="-m32 -O3 -g -fsanitize=undefined" $(MAKE) test FUZZER_TIME="-T30s" NB_LOOPS=-i1
 
 staticAnalyze: clean
 	CFLAGS=-g scan-build --status-bugs -v $(MAKE) all
@@ -159,20 +163,17 @@ platformTest: clean
 versionsTest: clean
 	$(MAKE) -C $(TESTDIR) $@
 
-gpptest: clean
-	g++ -v
-	CC=g++ $(MAKE) -C $(LZ4DIR)  all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
-	CC=g++ $(MAKE) -C $(PRGDIR)  all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
-	CC=g++ $(MAKE) -C $(TESTDIR) all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
-
-gpptest32: clean
-	g++ -v
-	CC=g++ $(MAKE) -C $(LZ4DIR)  all    CFLAGS="-m32 -O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
-	CC=g++ $(MAKE) -C $(PRGDIR)  native CFLAGS="-m32 -O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
-	CC=g++ $(MAKE) -C $(TESTDIR) native CFLAGS="-m32 -O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
+gpptest gpptest32: CC = "$(CXX) -Wno-deprecated"
+gpptest gpptest32: CFLAGS = -O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror
+gpptest32: CFLAGS += -m32
+gpptest gpptest32: clean
+	$(CXX) -v
+	CC=$(CC) $(MAKE) -C $(LZ4DIR)  all CFLAGS="$(CFLAGS)"
+	CC=$(CC) $(MAKE) -C $(PRGDIR)  all CFLAGS="$(CFLAGS)"
+	CC=$(CC) $(MAKE) -C $(TESTDIR) all CFLAGS="$(CFLAGS)"
 
 c_standards: clean
-	# note : lz4 is not C90 compatible, because it requires long long support
+	CFLAGS="-std=c90   -Werror" $(MAKE) clean allmost
 	CFLAGS="-std=gnu90 -Werror" $(MAKE) clean allmost
 	CFLAGS="-std=c99   -Werror" $(MAKE) clean allmost
 	CFLAGS="-std=gnu99 -Werror" $(MAKE) clean allmost

diff --git a/NEWS b/NEWS
@@ -1,3 +1,15 @@
+v1.8.2
+perf: *much* faster dictionary compression on small files, by @felixhandte
+perf: improved decompression speed and binary size, by Alexey Tourbin (@svpv)
+perf: slightly faster HC compression and decompression speed
+perf: very small compression ratio improvement
+fix : compression compatible with low memory addresses (< 0xFFFF)
+fix : decompression segfault when provided with NULL input, by @terrelln
+cli : new command --favor-decSpeed
+cli : benchmark mode more accurate for small inputs
+fullbench : can bench _destSize() variants, by @felixhandte
+doc : clarified block format parsing restrictions, by Alexey Tourbin (@svpv)
+
 v1.8.1
 perf : faster and stronger ultra modes (levels 10+)
 perf : slightly faster compression and decompression speed

diff --git a/README.md b/README.md
@@ -43,33 +43,32 @@ Benchmarks
 -------------------------
 
 The benchmark uses [lzbench], from @inikep
-compiled with GCC v6.2.0 on Linux 64-bits.
-The reference system uses a Core i7-3930K CPU @ 4.5GHz.
+compiled with GCC v7.3.0 on Linux 64-bits (Debian 4.15.17-1).
+The reference system uses a Core i7-6700K CPU @ 4.0GHz.
 Benchmark evaluates the compression of reference [Silesia Corpus]
 in single-thread mode.
 
 [lzbench]: https://github.com/inikep/lzbench
 [Silesia Corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia
 
-|  Compressor            | Ratio   | Compression | Decompression |
-|  ----------            | -----   | ----------- | ------------- |
-|  memcpy                |  1.000  | 7300 MB/s   |   7300 MB/s   |
-|**LZ4 fast 8  (v1.7.3)**|  1.799  |**911 MB/s** | **3360 MB/s** |
-|**LZ4 default (v1.7.3)**|**2.101**|**625 MB/s** | **3220 MB/s** |
-|  LZO 2.09              |  2.108  |  620 MB/s   |    845 MB/s   |
-|  QuickLZ 1.5.0         |  2.238  |  510 MB/s   |    600 MB/s   |
-|  Snappy 1.1.3          |  2.091  |  450 MB/s   |   1550 MB/s   |
-|  LZF v3.6              |  2.073  |  365 MB/s   |    820 MB/s   |
-|  [Zstandard] 1.1.1 -1  |  2.876  |  330 MB/s   |    930 MB/s   |
-|  [Zstandard] 1.1.1 -3  |  3.164  |  200 MB/s   |    810 MB/s   |
-| [zlib] deflate 1.2.8 -1|  2.730  |  100 MB/s   |    370 MB/s   |
-|**LZ4 HC -9 (v1.7.3)**  |**2.720**|   34 MB/s   | **3240 MB/s** |
-| [zlib] deflate 1.2.8 -6|  3.099  |   33 MB/s   |    390 MB/s   |
+|  Compressor             | Ratio   | Compression | Decompression |
+|  ----------             | -----   | ----------- | ------------- |
+|  memcpy                 |  1.000  |13100 MB/s   |  13100 MB/s   |
+|**LZ4 default (v1.8.2)** |**2.101**|**730 MB/s** | **3900 MB/s** |
+|  LZO 2.09               |  2.108  |  630 MB/s   |    800 MB/s   |
+|  QuickLZ 1.5.0          |  2.238  |  530 MB/s   |    720 MB/s   |
+|  Snappy 1.1.4           |  2.091  |  525 MB/s   |   1750 MB/s   |
+|  [Zstandard] 1.3.4 -1   |  2.877  |  470 MB/s   |   1380 MB/s   |
+|  LZF v3.6               |  2.073  |  380 MB/s   |    840 MB/s   |
+| [zlib] deflate 1.2.11 -1|  2.730  |  100 MB/s   |    380 MB/s   |
+|**LZ4 HC -9 (v1.8.2)**   |**2.721**|   40 MB/s   | **3920 MB/s** |
+| [zlib] deflate 1.2.11 -6|  3.099  |   34 MB/s   |    410 MB/s   |
 
 [zlib]: http://www.zlib.net/
 [Zstandard]: http://www.zstd.net/
 
-LZ4 is also compatible and well optimized for x32 mode, for which it provides an additional +10% speed performance.
+LZ4 is also compatible and well optimized for x32 mode,
+for which it provides some additional speed performance.
 
 
 Installation

diff --git a/circle.yml b/circle.yml
@@ -11,22 +11,22 @@ test:
     - clang -v; make clangtest && make clean
     - g++ -v; make gpptest     && make clean
     - gcc -v; make c_standards && make clean
-    - gcc-5 -v; make -C tests test-lz4 CC=gcc-5 MOREFLAGS=-Werror && make clean
-    - gcc-5 -v; make -C tests test-lz4c32 CC=gcc-5 MOREFLAGS="-I/usr/include/x86_64-linux-gnu -Werror" && make clean
-    - gcc-6 -v; make c_standards CC=gcc-6 && make clean
-    - gcc-6 -v; make -C tests test-lz4 CC=gcc-6 MOREFLAGS=-Werror && make clean
+    - gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -Werror" make check && make clean
+    - gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean
+    - gcc-6 -v; CC=gcc-6 make c_standards && make clean
+    - gcc-6 -v; CC=gcc-6 MOREFLAGS="-O2 -Werror" make check  && make clean
 # Shorter tests
     - make cmake               && make clean
     - make -C tests test-lz4
     - make -C tests test-lz4c
     - make -C tests test-frametest
     - make -C tests test-fullbench
     - make -C tests test-fuzzer && make clean
-    - make -C lib all && make clean
-    - pyenv global 3.4.4; CFLAGS=-I/usr/include/x86_64-linux-gnu make versionsTest && make clean
+    - make -C lib all          && make clean
+    - pyenv global 3.4.4; make versionsTest MOREFLAGS=-I/usr/include/x86_64-linux-gnu && make clean
     - make travis-install      && make clean
   # Longer tests
-    - gcc -v; make -C tests test32 MOREFLAGS="-I/usr/include/x86_64-linux-gnu" && make clean
+    - gcc -v; CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean
     - make usan                && make clean
     - clang -v; make staticAnalyze && make clean
   # Valgrind tests

diff --git a/contrib/gen_manual/Makefile b/contrib/gen_manual/Makefile
@@ -30,10 +30,10 @@
 # ################################################################
 
 
-CFLAGS ?= -O3
-CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wno-comment
-CFLAGS += $(MOREFLAGS)
-FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
+CXXFLAGS ?= -O3
+CXXFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wno-comment
+CXXFLAGS += $(MOREFLAGS)
+FLAGS   = $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS)
 
 LZ4API = ../../lib/lz4.h
 LZ4MANUAL = ../../doc/lz4_manual.html

diff --git a/doc/images/usingCDict_1_8_2.png b/doc/images/usingCDict_1_8_2.png
diff --git a/doc/lz4_Block_format.md b/doc/lz4_Block_format.md
@@ -1,6 +1,6 @@
 LZ4 Block Format Description
 ============================
-Last revised: 2015-05-07.
+Last revised: 2018-04-25.
 Author : Yann Collet
 
 
@@ -29,8 +29,8 @@ An LZ4 compressed block is composed of sequences.
 A sequence is a suite of literals (not-compressed bytes),
 followed by a match copy.
 
-Each sequence starts with a token.
-The token is a one byte value, separated into two 4-bits fields.
+Each sequence starts with a `token`.
+The `token` is a one byte value, separated into two 4-bits fields.
 Therefore each field ranges from 0 to 15.
 
 
@@ -42,46 +42,46 @@ If it is 15, then we need to add some more bytes to indicate the full length.
 Each additional byte then represent a value from 0 to 255,
 which is added to the previous value to produce a total length.
 When the byte value is 255, another byte is output.
-There can be any number of bytes following the token. There is no "size limit".
+There can be any number of bytes following `token`. There is no "size limit".
 (Side note : this is why a not-compressible input block is expanded by 0.4%).
 
-Example 1 : A length of 48 will be represented as :
+Example 1 : A literal length of 48 will be represented as :
 
   - 15 : value for the 4-bits High field
   - 33 : (=48-15) remaining length to reach 48
 
-Example 2 : A length of 280 will be represented as :
+Example 2 : A literal length of 280 will be represented as :
 
   - 15  : value for the 4-bits High field
   - 255 : following byte is maxed, since 280-15 >= 255
   - 10  : (=280 - 15 - 255) ) remaining length to reach 280
 
-Example 3 : A length of 15 will be represented as :
+Example 3 : A literal length of 15 will be represented as :
 
   - 15 : value for the 4-bits High field
   - 0  : (=15-15) yes, the zero must be output
 
-Following the token and optional length bytes, are the literals themselves.
+Following `token` and optional length bytes, are the literals themselves.
 They are exactly as numerous as previously decoded (length of literals).
 It's possible that there are zero literal.
 
 
 Following the literals is the match copy operation.
 
-It starts by the offset.
+It starts by the `offset`.
 This is a 2 bytes value, in little endian format
 (the 1st byte is the "low" byte, the 2nd one is the "high" byte).
 
-The offset represents the position of the match to be copied from.
+The `offset` represents the position of the match to be copied from.
 1 means "current position - 1 byte".
-The maximum offset value is 65535, 65536 cannot be coded.
+The maximum `offset` value is 65535, 65536 cannot be coded.
 Note that 0 is an invalid value, not used.
 
-Then we need to extract the match length.
+Then we need to extract the `matchlength`.
 For this, we use the second token field, the low 4-bits.
 Value, obviously, ranges from 0 to 15.
 However here, 0 means that the copy operation will be minimal.
-The minimum length of a match, called minmatch, is 4.
+The minimum length of a match, called `minmatch`, is 4.
 As a consequence, a 0 value means 4 bytes, and a value of 15 means 19+ bytes.
 Similar to literal length, on reaching the highest possible value (15),
 we output additional bytes, one at a time, with values ranging from 0 to 255.
@@ -90,34 +90,47 @@ A 255 value means there is another byte to read and add.
 There is no limit to the number of optional bytes that can be output this way.
 (This points towards a maximum achievable compression ratio of about 250).
 
-Decoding the matchlength reaches the end of current sequence.
+Decoding the `matchlength` reaches the end of current sequence.
 Next byte will be the start of another sequence.
 But before moving to next sequence,
 it's time to use the decoded match position and length.
-The decoder copies matchlength bytes from match position to current position.
+The decoder copies `matchlength` bytes from match position to current position.
 
-In some cases, matchlength is larger than offset.
-Therefore, match pos + match length > current pos,
+In some cases, `matchlength` is larger than `offset`.
+Therefore, `match_pos + matchlength > current_pos`,
 which means that later bytes to copy are not yet decoded.
 This is called an "overlap match", and must be handled with special care.
-The most common case is an offset of 1,
-meaning the last byte is repeated matchlength times.
+A common case is an offset of 1,
+meaning the last byte is repeated `matchlength` times.
 
 
 Parsing restrictions
 -----------------------
 There are specific parsing rules to respect in order to remain compatible
 with assumptions made by the decoder :
 
-1. The last 5 bytes are always literals
+1. The last 5 bytes are always literals.  In other words, the last five bytes
+   from the uncompressed input (or all bytes, if the input has less than five
+   bytes) must be encoded as literals on behalf of the last sequence.
+   The last sequence is incomplete, and stops right after the literals.
 2. The last match must start at least 12 bytes before end of block.
-   Consequently, a block with less than 13 bytes cannot be compressed.
+   The last match is part of the penultimate sequence,
+   since the last sequence stops right after literals.
+   Note that, as a consequence, blocks < 13 bytes cannot be compressed.
 
 These rules are in place to ensure that the decoder
-will never read beyond the input buffer, nor write beyond the output buffer.
-
-Note that the last sequence is also incomplete,
-and stops right after literals.
+can speculatively execute copy instructions
+without ever reading nor writing beyond provided I/O buffers.
+
+1. To copy literals from a non-last sequence, an 8-byte copy instruction
+   can always be safely issued (without reading past the input),
+   because literals are followed by a 2-byte offset,
+   and last sequence is at least 1+5 bytes long.
+2. Similarly, a match operation can speculatively copy up to 12 bytes
+   while remaining within output buffer boundaries.
+
+Empty inputs can be represented with a zero byte,
+interpreted as a token without literals and without a match.
 
 
 Additional notes
-Original file line number
+Diff line change
@@ Expand Up / @@ -29,3 +29,7 @@ bin/ @@
     # Mac
     .DS_Store
     *.dSYM
+    # Windows / Msys
+    nul
+    ld.exe*