Skip to content
This repository has been archived by the owner on Feb 5, 2025. It is now read-only.

Commit

Permalink
Merge pull request lz4#531 from lz4/dev
Browse files Browse the repository at this point in the history
Preparing v1.8.2
  • Loading branch information
Cyan4973 authored May 7, 2018
2 parents dfed9fa + bf6fd93 commit b3692db
Show file tree
Hide file tree
Showing 50 changed files with 5,037 additions and 1,985 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,7 @@ bin/
# Mac
.DS_Store
*.dSYM

# Windows / Msys
nul
ld.exe*
16 changes: 12 additions & 4 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ matrix:
env: Ubu=12.04cont Cmd='make -C tests test-lz4 test-lz4c test-fullbench' COMPILER=cc

- os: linux
sudo: false
env: Ubu=12.04cont Cmd='make -C tests test-frametest test-fuzzer' COMPILER=cc
sudo: required
env: Ubu=12.04cont Cmd='sudo sysctl -w vm.mmap_min_addr="4096" && make -C tests test-frametest test-fuzzer' COMPILER=cc

- os: linux
sudo: false
env: Ubu=12.04cont Cmd="make gpptest && make clean examples && make clean cmake && make clean travis-install && make clean clangtest" COMPILER=cc
env: Ubu=12.04cont Cmd="make gpptest && make clean && make examples && make clean cmake && make clean travis-install && make clean clangtest" COMPILER=cc


# 14.04 LTS Server Edition 64 bit
Expand Down Expand Up @@ -59,7 +59,7 @@ matrix:
- libc6-dev-i386
- gcc-multilib

- env: Ubu=14.04 Cmd='make -C tests test-frametest32 test-fuzzer32' COMPILER=cc
- env: Ubu=14.04 Cmd='sudo sysctl -w vm.mmap_min_addr="4096" && make -C tests test-frametest32 test-fuzzer32' COMPILER=cc
dist: trusty
sudo: required
addons:
Expand Down Expand Up @@ -145,7 +145,15 @@ matrix:
- gcc-multilib
- gcc-4.4

# tag-specific test
- if: tag =~ ^v[0-9]\.[0-9]
os: linux
sudo: false
env: Cmd="make -C tests checkTag && tests/checkTag $TRAVIS_BRANCH " COMPILER=cc


script:
- uname -a
- echo Cmd=$Cmd
- $COMPILER -v
- sh -c "$Cmd"
39 changes: 20 additions & 19 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
# ################################################################
# LZ4 - Makefile
# Copyright (C) Yann Collet 2011-2016
# Copyright (C) Yann Collet 2011-present
# All rights reserved.
#
# This Makefile is validated for Linux, macOS, *BSD, Hurd, Solaris, MSYS2 targets
#
# BSD license
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -58,6 +56,7 @@ all: allmost manuals
allmost: lib lz4 examples

.PHONY: lib lib-release liblz4.a
lib: liblz4.a
lib lib-release liblz4.a:
@$(MAKE) -C $(LZ4DIR) $@

Expand All @@ -69,8 +68,8 @@ lz4 lz4-release :
@cp $(PRGDIR)/lz4$(EXT) .

.PHONY: examples
examples: lib lz4
$(MAKE) -C $(EXDIR) test
examples: liblz4.a
$(MAKE) -C $(EXDIR) all

.PHONY: manuals
manuals:
Expand Down Expand Up @@ -122,9 +121,14 @@ ifneq (,$(filter $(HOST_OS),MSYS POSIX))
list:
@$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs

.PHONY: check
check:
$(MAKE) -C $(TESTDIR) test-lz4-essentials

.PHONY: test
test:
$(MAKE) -C $(TESTDIR) $@
$(MAKE) -C $(EXDIR) $@

clangtest: clean
clang -v
Expand All @@ -139,10 +143,10 @@ clangtest-native: clean
@CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" $(MAKE) -C $(TESTDIR) native CC=clang

usan: clean
CC=clang CFLAGS="-O3 -g -fsanitize=undefined" $(MAKE) test FUZZER_TIME="-T1mn" NB_LOOPS=-i1
CC=clang CFLAGS="-O3 -g -fsanitize=undefined" $(MAKE) test FUZZER_TIME="-T30s" NB_LOOPS=-i1

usan32: clean
CFLAGS="-m32 -O3 -g -fsanitize=undefined" $(MAKE) test FUZZER_TIME="-T1mn" NB_LOOPS=-i1
CFLAGS="-m32 -O3 -g -fsanitize=undefined" $(MAKE) test FUZZER_TIME="-T30s" NB_LOOPS=-i1

staticAnalyze: clean
CFLAGS=-g scan-build --status-bugs -v $(MAKE) all
Expand All @@ -159,20 +163,17 @@ platformTest: clean
versionsTest: clean
$(MAKE) -C $(TESTDIR) $@

gpptest: clean
g++ -v
CC=g++ $(MAKE) -C $(LZ4DIR) all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
CC=g++ $(MAKE) -C $(PRGDIR) all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
CC=g++ $(MAKE) -C $(TESTDIR) all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"

gpptest32: clean
g++ -v
CC=g++ $(MAKE) -C $(LZ4DIR) all CFLAGS="-m32 -O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
CC=g++ $(MAKE) -C $(PRGDIR) native CFLAGS="-m32 -O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
CC=g++ $(MAKE) -C $(TESTDIR) native CFLAGS="-m32 -O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
gpptest gpptest32: CC = "$(CXX) -Wno-deprecated"
gpptest gpptest32: CFLAGS = -O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror
gpptest32: CFLAGS += -m32
gpptest gpptest32: clean
$(CXX) -v
CC=$(CC) $(MAKE) -C $(LZ4DIR) all CFLAGS="$(CFLAGS)"
CC=$(CC) $(MAKE) -C $(PRGDIR) all CFLAGS="$(CFLAGS)"
CC=$(CC) $(MAKE) -C $(TESTDIR) all CFLAGS="$(CFLAGS)"

c_standards: clean
# note : lz4 is not C90 compatible, because it requires long long support
CFLAGS="-std=c90 -Werror" $(MAKE) clean allmost
CFLAGS="-std=gnu90 -Werror" $(MAKE) clean allmost
CFLAGS="-std=c99 -Werror" $(MAKE) clean allmost
CFLAGS="-std=gnu99 -Werror" $(MAKE) clean allmost
Expand Down
12 changes: 12 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
v1.8.2
perf: *much* faster dictionary compression on small files, by @felixhandte
perf: improved decompression speed and binary size, by Alexey Tourbin (@svpv)
perf: slightly faster HC compression and decompression speed
perf: very small compression ratio improvement
fix : compression compatible with low memory addresses (< 0xFFFF)
fix : decompression segfault when provided with NULL input, by @terrelln
cli : new command --favor-decSpeed
cli : benchmark mode more accurate for small inputs
fullbench : can bench _destSize() variants, by @felixhandte
doc : clarified block format parsing restrictions, by Alexey Tourbin (@svpv)

v1.8.1
perf : faster and stronger ultra modes (levels 10+)
perf : slightly faster compression and decompression speed
Expand Down
33 changes: 16 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,33 +43,32 @@ Benchmarks
-------------------------

The benchmark uses [lzbench], from @inikep
compiled with GCC v6.2.0 on Linux 64-bits.
The reference system uses a Core i7-3930K CPU @ 4.5GHz.
compiled with GCC v7.3.0 on Linux 64-bits (Debian 4.15.17-1).
The reference system uses a Core i7-6700K CPU @ 4.0GHz.
Benchmark evaluates the compression of reference [Silesia Corpus]
in single-thread mode.

[lzbench]: https://github.com/inikep/lzbench
[Silesia Corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia

| Compressor | Ratio | Compression | Decompression |
| ---------- | ----- | ----------- | ------------- |
| memcpy | 1.000 | 7300 MB/s | 7300 MB/s |
|**LZ4 fast 8 (v1.7.3)**| 1.799 |**911 MB/s** | **3360 MB/s** |
|**LZ4 default (v1.7.3)**|**2.101**|**625 MB/s** | **3220 MB/s** |
| LZO 2.09 | 2.108 | 620 MB/s | 845 MB/s |
| QuickLZ 1.5.0 | 2.238 | 510 MB/s | 600 MB/s |
| Snappy 1.1.3 | 2.091 | 450 MB/s | 1550 MB/s |
| LZF v3.6 | 2.073 | 365 MB/s | 820 MB/s |
| [Zstandard] 1.1.1 -1 | 2.876 | 330 MB/s | 930 MB/s |
| [Zstandard] 1.1.1 -3 | 3.164 | 200 MB/s | 810 MB/s |
| [zlib] deflate 1.2.8 -1| 2.730 | 100 MB/s | 370 MB/s |
|**LZ4 HC -9 (v1.7.3)** |**2.720**| 34 MB/s | **3240 MB/s** |
| [zlib] deflate 1.2.8 -6| 3.099 | 33 MB/s | 390 MB/s |
| Compressor | Ratio | Compression | Decompression |
| ---------- | ----- | ----------- | ------------- |
| memcpy | 1.000 |13100 MB/s | 13100 MB/s |
|**LZ4 default (v1.8.2)** |**2.101**|**730 MB/s** | **3900 MB/s** |
| LZO 2.09 | 2.108 | 630 MB/s | 800 MB/s |
| QuickLZ 1.5.0 | 2.238 | 530 MB/s | 720 MB/s |
| Snappy 1.1.4 | 2.091 | 525 MB/s | 1750 MB/s |
| [Zstandard] 1.3.4 -1 | 2.877 | 470 MB/s | 1380 MB/s |
| LZF v3.6 | 2.073 | 380 MB/s | 840 MB/s |
| [zlib] deflate 1.2.11 -1| 2.730 | 100 MB/s | 380 MB/s |
|**LZ4 HC -9 (v1.8.2)** |**2.721**| 40 MB/s | **3920 MB/s** |
| [zlib] deflate 1.2.11 -6| 3.099 | 34 MB/s | 410 MB/s |

[zlib]: http://www.zlib.net/
[Zstandard]: http://www.zstd.net/

LZ4 is also compatible and well optimized for x32 mode, for which it provides an additional +10% speed performance.
LZ4 is also compatible and well optimized for x32 mode,
for which it provides some additional speed performance.


Installation
Expand Down
14 changes: 7 additions & 7 deletions circle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,22 @@ test:
- clang -v; make clangtest && make clean
- g++ -v; make gpptest && make clean
- gcc -v; make c_standards && make clean
- gcc-5 -v; make -C tests test-lz4 CC=gcc-5 MOREFLAGS=-Werror && make clean
- gcc-5 -v; make -C tests test-lz4c32 CC=gcc-5 MOREFLAGS="-I/usr/include/x86_64-linux-gnu -Werror" && make clean
- gcc-6 -v; make c_standards CC=gcc-6 && make clean
- gcc-6 -v; make -C tests test-lz4 CC=gcc-6 MOREFLAGS=-Werror && make clean
- gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -Werror" make check && make clean
- gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean
- gcc-6 -v; CC=gcc-6 make c_standards && make clean
- gcc-6 -v; CC=gcc-6 MOREFLAGS="-O2 -Werror" make check && make clean
# Shorter tests
- make cmake && make clean
- make -C tests test-lz4
- make -C tests test-lz4c
- make -C tests test-frametest
- make -C tests test-fullbench
- make -C tests test-fuzzer && make clean
- make -C lib all && make clean
- pyenv global 3.4.4; CFLAGS=-I/usr/include/x86_64-linux-gnu make versionsTest && make clean
- make -C lib all && make clean
- pyenv global 3.4.4; make versionsTest MOREFLAGS=-I/usr/include/x86_64-linux-gnu && make clean
- make travis-install && make clean
# Longer tests
- gcc -v; make -C tests test32 MOREFLAGS="-I/usr/include/x86_64-linux-gnu" && make clean
- gcc -v; CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean
- make usan && make clean
- clang -v; make staticAnalyze && make clean
# Valgrind tests
Expand Down
8 changes: 4 additions & 4 deletions contrib/gen_manual/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@
# ################################################################


CFLAGS ?= -O3
CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wno-comment
CFLAGS += $(MOREFLAGS)
FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
CXXFLAGS ?= -O3
CXXFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wno-comment
CXXFLAGS += $(MOREFLAGS)
FLAGS = $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS)

LZ4API = ../../lib/lz4.h
LZ4MANUAL = ../../doc/lz4_manual.html
Expand Down
Binary file added doc/images/usingCDict_1_8_2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
63 changes: 38 additions & 25 deletions doc/lz4_Block_format.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
LZ4 Block Format Description
============================
Last revised: 2015-05-07.
Last revised: 2018-04-25.
Author : Yann Collet


Expand Down Expand Up @@ -29,8 +29,8 @@ An LZ4 compressed block is composed of sequences.
A sequence is a suite of literals (not-compressed bytes),
followed by a match copy.

Each sequence starts with a token.
The token is a one byte value, separated into two 4-bits fields.
Each sequence starts with a `token`.
The `token` is a one byte value, separated into two 4-bits fields.
Therefore each field ranges from 0 to 15.


Expand All @@ -42,46 +42,46 @@ If it is 15, then we need to add some more bytes to indicate the full length.
Each additional byte then represent a value from 0 to 255,
which is added to the previous value to produce a total length.
When the byte value is 255, another byte is output.
There can be any number of bytes following the token. There is no "size limit".
There can be any number of bytes following `token`. There is no "size limit".
(Side note : this is why a not-compressible input block is expanded by 0.4%).

Example 1 : A length of 48 will be represented as :
Example 1 : A literal length of 48 will be represented as :

- 15 : value for the 4-bits High field
- 33 : (=48-15) remaining length to reach 48

Example 2 : A length of 280 will be represented as :
Example 2 : A literal length of 280 will be represented as :

- 15 : value for the 4-bits High field
- 255 : following byte is maxed, since 280-15 >= 255
- 10 : (=280 - 15 - 255) ) remaining length to reach 280

Example 3 : A length of 15 will be represented as :
Example 3 : A literal length of 15 will be represented as :

- 15 : value for the 4-bits High field
- 0 : (=15-15) yes, the zero must be output

Following the token and optional length bytes, are the literals themselves.
Following `token` and optional length bytes, are the literals themselves.
They are exactly as numerous as previously decoded (length of literals).
It's possible that there are zero literal.


Following the literals is the match copy operation.

It starts by the offset.
It starts by the `offset`.
This is a 2 bytes value, in little endian format
(the 1st byte is the "low" byte, the 2nd one is the "high" byte).

The offset represents the position of the match to be copied from.
The `offset` represents the position of the match to be copied from.
1 means "current position - 1 byte".
The maximum offset value is 65535, 65536 cannot be coded.
The maximum `offset` value is 65535, 65536 cannot be coded.
Note that 0 is an invalid value, not used.

Then we need to extract the match length.
Then we need to extract the `matchlength`.
For this, we use the second token field, the low 4-bits.
Value, obviously, ranges from 0 to 15.
However here, 0 means that the copy operation will be minimal.
The minimum length of a match, called minmatch, is 4.
The minimum length of a match, called `minmatch`, is 4.
As a consequence, a 0 value means 4 bytes, and a value of 15 means 19+ bytes.
Similar to literal length, on reaching the highest possible value (15),
we output additional bytes, one at a time, with values ranging from 0 to 255.
Expand All @@ -90,34 +90,47 @@ A 255 value means there is another byte to read and add.
There is no limit to the number of optional bytes that can be output this way.
(This points towards a maximum achievable compression ratio of about 250).

Decoding the matchlength reaches the end of current sequence.
Decoding the `matchlength` reaches the end of current sequence.
Next byte will be the start of another sequence.
But before moving to next sequence,
it's time to use the decoded match position and length.
The decoder copies matchlength bytes from match position to current position.
The decoder copies `matchlength` bytes from match position to current position.

In some cases, matchlength is larger than offset.
Therefore, match pos + match length > current pos,
In some cases, `matchlength` is larger than `offset`.
Therefore, `match_pos + matchlength > current_pos`,
which means that later bytes to copy are not yet decoded.
This is called an "overlap match", and must be handled with special care.
The most common case is an offset of 1,
meaning the last byte is repeated matchlength times.
A common case is an offset of 1,
meaning the last byte is repeated `matchlength` times.


Parsing restrictions
-----------------------
There are specific parsing rules to respect in order to remain compatible
with assumptions made by the decoder :

1. The last 5 bytes are always literals
1. The last 5 bytes are always literals. In other words, the last five bytes
from the uncompressed input (or all bytes, if the input has less than five
bytes) must be encoded as literals on behalf of the last sequence.
The last sequence is incomplete, and stops right after the literals.
2. The last match must start at least 12 bytes before end of block.
Consequently, a block with less than 13 bytes cannot be compressed.
The last match is part of the penultimate sequence,
since the last sequence stops right after literals.
Note that, as a consequence, blocks < 13 bytes cannot be compressed.

These rules are in place to ensure that the decoder
will never read beyond the input buffer, nor write beyond the output buffer.

Note that the last sequence is also incomplete,
and stops right after literals.
can speculatively execute copy instructions
without ever reading nor writing beyond provided I/O buffers.

1. To copy literals from a non-last sequence, an 8-byte copy instruction
can always be safely issued (without reading past the input),
because literals are followed by a 2-byte offset,
and last sequence is at least 1+5 bytes long.
2. Similarly, a match operation can speculatively copy up to 12 bytes
while remaining within output buffer boundaries.

Empty inputs can be represented with a zero byte,
interpreted as a token without literals and without a match.


Additional notes
Expand Down
Loading

0 comments on commit b3692db

Please sign in to comment.