diff --git a/.circleci/config.yml b/.circleci/config.yml
new file mode 100644
index 000000000..4c08cb2f4
--- /dev/null
+++ b/.circleci/config.yml
@@ -0,0 +1,108 @@
+# This configuration was automatically generated from a CircleCI 1.0 config.
+# It should include any build commands you had along with commands that CircleCI
+# inferred from your project structure. We strongly recommend you read all the
+# comments in this file to understand the structure of CircleCI 2.0, as the idiom
+# for configuration has changed substantially in 2.0 to allow arbitrary jobs rather
+# than the prescribed lifecycle of 1.0. In general, we recommend using this generated
+# configuration as a reference rather than using it in production, though in most
+# cases it should duplicate the execution of your original 1.0 config.
+version: 2
+jobs:
+  build:
+    working_directory: ~/lz4/lz4
+    parallelism: 1
+    shell: /bin/bash --login
+    # CircleCI 2.0 does not support environment variables that refer to each other the same way as 1.0 did.
+    # If any of these refer to each other, rewrite them so that they don't or see https://circleci.com/docs/2.0/env-vars/#interpolating-environment-variables-to-set-other-environment-variables .
+    environment:
+      CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
+      CIRCLE_TEST_REPORTS: /tmp/circleci-test-results
+    # In CircleCI 1.0 we used a pre-configured image with a large number of languages and other packages.
+    # In CircleCI 2.0 you can now specify your own image, or use one of our pre-configured images.
+    # The following configuration line tells CircleCI to use the specified docker image as the runtime environment for you job.
+    # We have selected a pre-built image that mirrors the build environment we use on
+    # the 1.0 platform, but we recommend you choose an image more tailored to the needs
+    # of each job. For more information on choosing an image (or alternatively using a
+    # VM instead of a container) see https://circleci.com/docs/2.0/executor-types/
+    # To see the list of pre-built images that CircleCI provides for most common languages see
+    # https://circleci.com/docs/2.0/circleci-images/
+    docker:
+    - image: circleci/build-image:ubuntu-14.04-XXL-upstart-1189-5614f37
+      command: /sbin/init
+    steps:
+    # Machine Setup
+    #   If you break your build into multiple jobs with workflows, you will probably want to do the parts of this that are relevant in each
+    # The following `checkout` command checks out your code to your working directory. In 1.0 we did this implicitly. In 2.0 you can choose where in the course of a job your code should be checked out.
+    - checkout
+    # Prepare for artifact and test results  collection equivalent to how it was done on 1.0.
+    # In many cases you can simplify this from what is generated here.
+    # 'See docs on artifact collection here https://circleci.com/docs/2.0/artifacts/'
+    - run: mkdir -p $CIRCLE_ARTIFACTS $CIRCLE_TEST_REPORTS
+    # Dependencies
+    #   This would typically go in either a build or a build-and-test job when using workflows
+    # Restore the dependency cache
+    - restore_cache:
+        keys:
+        # This branch if available
+        - v1-dep-{{ .Branch }}-
+        # Default branch if not
+        - v1-dep-dev-
+        # Any branch if there are none on the default branch - this should be unnecessary if you have your default branch configured correctly
+        - v1-dep-
+    # This is based on your 1.0 configuration file or project settings
+    - run: sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test; sudo apt-get -y -qq update
+    - run: sudo apt-get -y install qemu-system-ppc qemu-user-static gcc-powerpc-linux-gnu
+    - run: sudo apt-get -y install qemu-system-arm gcc-arm-linux-gnueabi libc6-dev-armel-cross gcc-aarch64-linux-gnu libc6-dev-arm64-cross
+    - run: sudo apt-get -y install libc6-dev-i386 clang gcc-5 gcc-5-multilib gcc-6 valgrind
+    # Save dependency cache
+    - save_cache:
+        key: v1-dep-{{ .Branch }}-{{ epoch }}
+        paths:
+        # This is a broad list of cache paths to include many possible development environments
+        # You can probably delete some of these entries
+        - vendor/bundle
+        - ~/virtualenvs
+        - ~/.m2
+        - ~/.ivy2
+        - ~/.bundle
+        - ~/.go_workspace
+        - ~/.gradle
+        - ~/.cache/bower
+    # Test
+    #   This would typically be a build job when using workflows, possibly combined with build
+    # This is based on your 1.0 configuration file or project settings
+    - run: clang -v; make clangtest && make clean
+    - run: g++ -v; make gpptest     && make clean
+    - run: gcc -v; make c_standards && make clean
+    - run: gcc -v; g++ -v; make ctocpptest && make clean
+    - run: gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -Werror" make check && make clean
+    - run: gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean
+    - run: gcc-6 -v; CC=gcc-6 make c_standards && make clean
+    - run: gcc-6 -v; CC=gcc-6 MOREFLAGS="-O2 -Werror" make check  && make clean
+    - run: make cmake               && make clean
+    - run: make -C tests test-lz4
+    - run: make -C tests test-lz4c
+    - run: make -C tests test-frametest
+    - run: make -C tests test-fullbench
+    - run: make -C tests test-fuzzer && make clean
+    - run: make -C lib all          && make clean
+    - run: pyenv global 3.4.4; make versionsTest MOREFLAGS=-I/usr/include/x86_64-linux-gnu && make clean
+    - run: make travis-install      && make clean
+    - run: gcc -v; CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean
+    - run: make usan                && make clean
+    - run: clang -v; make staticAnalyze && make clean
+    - run: make -C tests test-mem && make clean
+    - run: make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static                  && make clean
+    - run: make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static MOREFLAGS=-m64 && make clean
+    - run: make platformTest CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static                  && make clean
+    - run: make platformTest CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static              && make clean
+    # Teardown
+    #   If you break your build into multiple jobs with workflows, you will probably want to do the parts of this that are relevant in each
+    # Save test results
+    - store_test_results:
+        path: /tmp/circleci-test-results
+    # Save artifacts
+    - store_artifacts:
+        path: /tmp/circleci-artifacts
+    - store_artifacts:
+        path: /tmp/circleci-test-results
diff --git a/.travis.yml b/.travis.yml
index 0a876f925..de6875be7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -49,6 +49,10 @@ matrix:
           packages:
             - valgrind
 
+    - env: Ubu=14.04 Cmd='make ctocpptest' COMPILER=cc
+      dist: trusty
+      sudo: false
+
     - env: Ubu=14.04 Cmd='make -C tests test-lz4c32 test-fullbench32 versionsTest' COMPILER=cc
       dist: trusty
       sudo: required
diff --git a/Makefile b/Makefile
index 86613fd71..69a34b773 100644
--- a/Makefile
+++ b/Makefile
@@ -89,7 +89,7 @@ clean:
 #-----------------------------------------------------------------------------
 # make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets
 #-----------------------------------------------------------------------------
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS))
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD))
 HOST_OS = POSIX
 
 .PHONY: install uninstall
@@ -172,6 +172,14 @@ gpptest gpptest32: clean
 	CC=$(CC) $(MAKE) -C $(PRGDIR)  all CFLAGS="$(CFLAGS)"
 	CC=$(CC) $(MAKE) -C $(TESTDIR) all CFLAGS="$(CFLAGS)"
 
+ctocpptest: LIBCC="$(CC)"
+ctocpptest: TESTCC="$(CXX)"
+ctocpptest: CFLAGS=""
+ctocpptest: clean
+	CC=$(LIBCC)  $(MAKE) -C $(LZ4DIR)  CFLAGS="$(CFLAGS)" all
+	CC=$(LIBCC)  $(MAKE) -C $(TESTDIR) CFLAGS="$(CFLAGS)" lz4.o lz4hc.o lz4frame.o
+	CC=$(TESTCC) $(MAKE) -C $(TESTDIR) CFLAGS="$(CFLAGS)" all
+
 c_standards: clean
 	CFLAGS="-std=c90   -Werror" $(MAKE) clean allmost
 	CFLAGS="-std=gnu90 -Werror" $(MAKE) clean allmost
diff --git a/NEWS b/NEWS
index 0139e6123..13a9a1c2d 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,11 @@
+v1.8.3
+perf: minor decompression speed improvement (~+2%) with gcc
+fix : corruption in v1.8.2 at level 9 for files > 64KB under rare conditions (#560)
+cli : new command --fast, by @jennifermliu
+api : LZ4_decompress_safe_partial() now decodes exactly the nb of bytes requested (feature request #566)
+build : added Haiku target, by @fbrosson, and MidnightBSD, by @laffer1
+doc : updated documentation regarding dictionary compression
+
 v1.8.2
 perf: *much* faster dictionary compression on small files, by @felixhandte
 perf: improved decompression speed and binary size, by Alexey Tourbin (@svpv)
diff --git a/README.md b/README.md
index 406792a11..e64020d1c 100644
--- a/README.md
+++ b/README.md
@@ -2,18 +2,23 @@ LZ4 - Extremely fast compression
 ================================
 
 LZ4 is lossless compression algorithm,
-providing compression speed at 400 MB/s per core,
+providing compression speed > 500 MB/s per core,
 scalable with multi-cores CPU.
 It features an extremely fast decoder,
 with speed in multiple GB/s per core,
 typically reaching RAM speed limits on multi-core systems.
 
 Speed can be tuned dynamically, selecting an "acceleration" factor
-which trades compression ratio for more speed up.
+which trades compression ratio for faster speed.
 On the other end, a high compression derivative, LZ4_HC, is also provided,
 trading CPU time for improved compression ratio.
 All versions feature the same decompression speed.
 
+LZ4 is also compatible with [dictionary compression](https://github.com/facebook/zstd#the-case-for-small-data-compression),
+and can ingest any input file as dictionary,
+including those created by [Zstandard Dictionary Builder](https://github.com/facebook/zstd/blob/v1.3.5/programs/zstd.1.md#dictionary-builder).
+(note: only the final 64KB are used).
+
 LZ4 library is provided as open-source software using BSD 2-Clause license.
 
 
@@ -67,8 +72,8 @@ in single-thread mode.
 [zlib]: http://www.zlib.net/
 [Zstandard]: http://www.zstd.net/
 
-LZ4 is also compatible and well optimized for x32 mode,
-for which it provides some additional speed performance.
+LZ4 is also compatible and optimized for x32 mode,
+for which it provides additional speed performance.
 
 
 Installation
@@ -76,7 +81,7 @@ Installation
 
 ```
 make
-make install     # this command may require root access
+make install     # this command may require root permissions
 ```
 
 LZ4's `Makefile` supports standard [Makefile conventions],
@@ -94,10 +99,10 @@ Documentation
 
 The raw LZ4 block compression format is detailed within [lz4_Block_format].
 
-To compress an arbitrarily long file or data stream, multiple blocks are required.
-Organizing these blocks and providing a common header format to handle their content
-is the purpose of the Frame format, defined into [lz4_Frame_format].
-Interoperable versions of LZ4 must respect this frame format.
+Arbitrarily long files or data streams are compressed using multiple blocks,
+for streaming requirements. These blocks are organized into a frame,
+defined into [lz4_Frame_format].
+Interoperable versions of LZ4 must also respect the frame format.
 
 [lz4_Block_format]: doc/lz4_Block_format.md
 [lz4_Frame_format]: doc/lz4_Frame_format.md
diff --git a/circle.yml b/circle.yml
deleted file mode 100644
index fa3759069..000000000
--- a/circle.yml
+++ /dev/null
@@ -1,38 +0,0 @@
-dependencies:
-  override:
-    - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test; sudo apt-get -y -qq update
-    - sudo apt-get -y install qemu-system-ppc qemu-user-static gcc-powerpc-linux-gnu
-    - sudo apt-get -y install qemu-system-arm gcc-arm-linux-gnueabi libc6-dev-armel-cross gcc-aarch64-linux-gnu libc6-dev-arm64-cross
-    - sudo apt-get -y install libc6-dev-i386 clang gcc-5 gcc-5-multilib gcc-6 valgrind
-
-test:
-  override:
-  # Tests compilers and C standards
-    - clang -v; make clangtest && make clean
-    - g++ -v; make gpptest     && make clean
-    - gcc -v; make c_standards && make clean
-    - gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -Werror" make check && make clean
-    - gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean
-    - gcc-6 -v; CC=gcc-6 make c_standards && make clean
-    - gcc-6 -v; CC=gcc-6 MOREFLAGS="-O2 -Werror" make check  && make clean
-# Shorter tests
-    - make cmake               && make clean
-    - make -C tests test-lz4
-    - make -C tests test-lz4c
-    - make -C tests test-frametest
-    - make -C tests test-fullbench
-    - make -C tests test-fuzzer && make clean
-    - make -C lib all          && make clean
-    - pyenv global 3.4.4; make versionsTest MOREFLAGS=-I/usr/include/x86_64-linux-gnu && make clean
-    - make travis-install      && make clean
-  # Longer tests
-    - gcc -v; CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean
-    - make usan                && make clean
-    - clang -v; make staticAnalyze && make clean
-  # Valgrind tests
-    - make -C tests test-mem && make clean
-  # ARM, AArch64, PowerPC, PowerPC64 tests
-    - make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static                  && make clean
-    - make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static MOREFLAGS=-m64 && make clean
-    - make platformTest CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static                  && make clean
-    - make platformTest CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static              && make clean
diff --git a/contrib/cmake_unofficial/CMakeLists.txt b/contrib/cmake_unofficial/CMakeLists.txt
index 27c3a7881..b09c4fb0e 100644
--- a/contrib/cmake_unofficial/CMakeLists.txt
+++ b/contrib/cmake_unofficial/CMakeLists.txt
@@ -12,6 +12,8 @@
 
 set(LZ4_TOP_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../..")
 
+option(LZ4_BUILD_LEGACY_LZ4C "Build lz4c progam with legacy argument support" ON)
+
 # Parse version information
 file(STRINGS "${LZ4_TOP_SOURCE_DIR}/lib/lz4.h" LZ4_VERSION_MAJOR REGEX "^#define LZ4_VERSION_MAJOR +([0-9]+) +.*$")
 string(REGEX REPLACE "^#define LZ4_VERSION_MAJOR +([0-9]+) +.*$" "\\1" LZ4_VERSION_MAJOR "${LZ4_VERSION_MAJOR}")
@@ -122,14 +124,18 @@ else()
 endif()
 
 # lz4
+set(LZ4_PROGRAMS_BUILT lz4cli)
 add_executable(lz4cli ${LZ4_CLI_SOURCES})
 set_target_properties(lz4cli PROPERTIES OUTPUT_NAME lz4)
 target_link_libraries(lz4cli ${LZ4_LINK_LIBRARY})
 
 # lz4c
-add_executable(lz4c ${LZ4_CLI_SOURCES})
-set_target_properties(lz4c PROPERTIES COMPILE_DEFINITIONS "ENABLE_LZ4C_LEGACY_OPTIONS")
-target_link_libraries(lz4c ${LZ4_LINK_LIBRARY})
+if (LZ4_BUILD_LEGACY_LZ4C)
+  list(APPEND LZ4_PROGRAMS_BUILT lz4c)
+  add_executable(lz4c ${LZ4_CLI_SOURCES})
+  set_target_properties(lz4c PROPERTIES COMPILE_DEFINITIONS "ENABLE_LZ4C_LEGACY_OPTIONS")
+  target_link_libraries(lz4c ${LZ4_LINK_LIBRARY})
+endif()
 
 # Extra warning flags
 include (CheckCCompilerFlag)
@@ -165,7 +171,7 @@ endforeach (flag)
 if(NOT LZ4_BUNDLED_MODE)
   include(GNUInstallDirs)
 
-  install(TARGETS lz4cli lz4c
+  install(TARGETS ${LZ4_PROGRAMS_BUILT}
     RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")
   install(TARGETS ${LZ4_LIBRARIES_BUILT}
     LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
diff --git a/doc/lz4_manual.html b/doc/lz4_manual.html
index e5044fe70..6ebf8d281 100644
--- a/doc/lz4_manual.html
+++ b/doc/lz4_manual.html
@@ -1,10 +1,10 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>1.8.2 Manual</title>
+<title>1.8.3 Manual</title>
 </head>
 <body>
-<h1>1.8.2 Manual</h1>
+<h1>1.8.3 Manual</h1>
 <hr>
 <a name="Contents"></a><h2>Contents</h2>
 <ol>
@@ -110,45 +110,67 @@ <h1>1.8.2 Manual</h1>
 </b><p>    Same compression function, just using an externally allocated memory space to store compression state.
     Use LZ4_sizeofState() to know how much memory must be allocated,
     and allocate it on 8-bytes boundaries (using malloc() typically).
-    Then, provide it as 'void* state' to compression function.
+    Then, provide this buffer as 'void* state' to compression function.
 </p></pre><BR>
 
 <pre><b>int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);
-</b><p>    Reverse the logic : compresses as much data as possible from 'src' buffer
-    into already allocated buffer 'dst' of size 'targetDestSize'.
-    This function either compresses the entire 'src' content into 'dst' if it's large enough,
-    or fill 'dst' buffer completely with as much data as possible from 'src'.
-        *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'.
-                      New value is necessarily <= old value.
-        return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
-                 or 0 if compression fails
+</b><p>  Reverse the logic : compresses as much data as possible from 'src' buffer
+  into already allocated buffer 'dst', of size >= 'targetDestSize'.
+  This function either compresses the entire 'src' content into 'dst' if it's large enough,
+  or fill 'dst' buffer completely with as much data as possible from 'src'.
+  note: acceleration parameter is fixed to "default".
+
+ *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'.
+               New value is necessarily <= input value.
+ @return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
+           or 0 if compression fails.
 </p></pre><BR>
 
 <pre><b>int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
-</b><p>This function is a bit faster than LZ4_decompress_safe(),
-but it may misbehave on malformed input because it doesn't perform full validation of compressed data.
-    originalSize : is the uncompressed size to regenerate
-                   Destination buffer must be already allocated, and its size must be >= 'originalSize' bytes.
-    return : number of bytes read from source buffer (== compressed size).
-             If the source stream is detected malformed, the function stops decoding and return a negative result.
-    note : This function is only usable if the originalSize of uncompressed data is known in advance.
-           The caller should also check that all the compressed input has been consumed properly,
-           i.e. that the return value matches the size of the buffer with compressed input.
-           The function never writes past the output buffer.  However, since it doesn't know its 'src' size,
-           it may read past the intended input.  Also, because match offsets are not validated during decoding,
-           reads from 'src' may underflow.  Use this function in trusted environment **only**.
+</b><p>  This function used to be a bit faster than LZ4_decompress_safe(),
+  though situation has changed in recent versions,
+  and now `LZ4_decompress_safe()` can be as fast and sometimes faster than `LZ4_decompress_fast()`.
+  Moreover, LZ4_decompress_fast() is not protected vs malformed input, as it doesn't perform full validation of compressed data.
+  As a consequence, this function is no longer recommended, and may be deprecated in future versions.
+  It's only remaining specificity is that it can decompress data without knowing its compressed size.
+
+  originalSize : is the uncompressed size to regenerate.
+                 `dst` must be already allocated, its size must be >= 'originalSize' bytes.
+ @return : number of bytes read from source buffer (== compressed size).
+           If the source stream is detected malformed, the function stops decoding and returns a negative result.
+  note : This function requires uncompressed originalSize to be known in advance.
+         The function never writes past the output buffer.
+         However, since it doesn't know its 'src' size, it may read past the intended input.
+         Also, because match offsets are not validated during decoding,
+         reads from 'src' may underflow.
+         Use this function in trusted environment **only**.
+ 
 </p></pre><BR>
 
 <pre><b>int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
-</b><p>    This function decompress a compressed block of size 'srcSize' at position 'src'
-    into destination buffer 'dst' of size 'dstCapacity'.
-    The function will decompress a minimum of 'targetOutputSize' bytes, and stop after that.
-    However, it's not accurate, and may write more than 'targetOutputSize' (but always <= dstCapacity).
-   @return : the number of bytes decoded in the destination buffer (necessarily <= dstCapacity)
-        Note : this number can also be < targetOutputSize, if compressed block contains less data.
-            Therefore, always control how many bytes were decoded.
-            If source stream is detected malformed, function returns a negative result.
-            This function is protected against malicious data packets.
+</b><p>  Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
+  into destination buffer 'dst' of size 'dstCapacity'.
+  Up to 'targetOutputSize' bytes will be decoded.
+  The function stops decoding on reaching this objective,
+  which can boost performance when only the beginning of a block is required.
+
+ @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity)
+           If source stream is detected malformed, function returns a negative result.
+
+  Note : @return can be < targetOutputSize, if compressed block contains less data.
+
+  Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity,
+           and expects targetOutputSize <= dstCapacity.
+           It effectively stops decoding on reaching targetOutputSize,
+           so dstCapacity is kind of redundant.
+           This is because in a previous version of this function,
+           decoding operation would not "break" a sequence in the middle.
+           As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize,
+           it could write more bytes, though only up to dstCapacity.
+           Some "margin" used to be required for this operation to work properly.
+           This is no longer necessary.
+           The function nonetheless keeps its signature, in an effort to not break API.
+ 
 </p></pre><BR>
 
 <a name="Chapter6"></a><h2>Streaming Compression Functions</h2><pre></pre>
@@ -179,16 +201,23 @@ <h1>1.8.2 Manual</h1>
   'dst' buffer must be already allocated.
   If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
 
-  Important : The previous 64KB of compressed data is assumed to remain present and unmodified in memory!
-
-  Special 1 : When input is a double-buffer, they can have any size, including < 64 KB.
-              Make sure that buffers are separated by at least one byte.
-              This way, each block only depends on previous block.
-  Special 2 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
-
  @return : size of compressed block
            or 0 if there is an error (typically, cannot fit into 'dst').
-  After an error, the stream status is invalid, it can only be reset or freed.
+
+  Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block.
+           Each block has precise boundaries.
+           It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together.
+           Each block must be decompressed separately, calling LZ4_decompress_*() with associated metadata.
+
+  Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory!
+
+  Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB.
+           Make sure that buffers are separated, by at least one byte.
+           This construction ensures that each block only depends on previous block.
+
+  Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
+
+  Note 5 : After an error, the stream status is invalid, it can only be reset or freed.
  
 </p></pre><BR>
 
@@ -214,7 +243,7 @@ <h1>1.8.2 Manual</h1>
 <pre><b>int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
 </b><p>  An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
   Use this function to start decompression of a new stream of blocks.
-  A dictionary can optionnally be set. Use NULL or size 0 for a reset order.
+  A dictionary can optionally be set. Use NULL or size 0 for a reset order.
   Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
  @return : 1 if OK, 0 if error
  
diff --git a/doc/lz4frame_manual.html b/doc/lz4frame_manual.html
index 53ea7eb19..fb8e0ceb3 100644
--- a/doc/lz4frame_manual.html
+++ b/doc/lz4frame_manual.html
@@ -1,10 +1,10 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>1.8.2 Manual</title>
+<title>1.8.3 Manual</title>
 </head>
 <body>
-<h1>1.8.2 Manual</h1>
+<h1>1.8.3 Manual</h1>
 <hr>
 <a name="Contents"></a><h2>Contents</h2>
 <ol>
diff --git a/examples/frameCompress.c b/examples/frameCompress.c
index 9bfea483f..a0c5d3d80 100644
--- a/examples/frameCompress.c
+++ b/examples/frameCompress.c
@@ -31,12 +31,13 @@ static const LZ4F_preferences_t kPrefs = {
 static void safe_fwrite(void* buf, size_t eltSize, size_t nbElt, FILE* f)
 {
     size_t const writtenSize = fwrite(buf, eltSize, nbElt, f);
-    size_t const expectedSize = eltSize * nbElt;   /* note : should check for overflow */
+    size_t const expectedSize = eltSize * nbElt;
+    assert(expectedSize / nbElt == eltSize);   /* check overflow */
     if (writtenSize < expectedSize) {
         if (ferror(f))  /* note : ferror() must follow fwrite */
-            printf("Write failed\n");
+            fprintf(stderr, "Write failed \n");
         else
-            printf("Short write\n");
+            fprintf(stderr, "Short write \n");
         exit(1);
     }
 }
@@ -54,9 +55,9 @@ typedef struct {
 
 static compressResult_t
 compress_file_internal(FILE* f_in, FILE* f_out,
-                    LZ4F_compressionContext_t ctx,
-                    void* inBuff, size_t inChunkSize,
-                    void* outBuff, size_t outCapacity)
+                       LZ4F_compressionContext_t ctx,
+                       void* inBuff,  size_t inChunkSize,
+                       void* outBuff, size_t outCapacity)
 {
     compressResult_t result = { 1, 0, 0 };  /* result for an error */
     unsigned long long count_in = 0, count_out;
@@ -167,9 +168,9 @@ static size_t get_block_size(const LZ4F_frameInfo_t* info) {
 /* @return : 1==error, 0==success */
 static int
 decompress_file_internal(FILE* f_in, FILE* f_out,
-                        LZ4F_dctx* dctx,
-                        void* src, size_t srcCapacity, size_t filled, size_t alreadyConsumed,
-                        void* dst, size_t dstCapacity)
+                         LZ4F_dctx* dctx,
+                         void* src, size_t srcCapacity, size_t filled, size_t alreadyConsumed,
+                         void* dst, size_t dstCapacity)
 {
     int firstChunk = 1;
     size_t ret = 1;
@@ -194,7 +195,7 @@ decompress_file_internal(FILE* f_in, FILE* f_out,
          * Continue while there is more input to read (srcPtr != srcEnd)
          * and the frame isn't over (ret != 0)
          */
-        while (srcPtr != srcEnd && ret != 0) {
+        while (srcPtr < srcEnd && ret != 0) {
             /* Any data within dst has been flushed at this stage */
             size_t dstSize = dstCapacity;
             size_t srcSize = srcEnd - srcPtr;
@@ -208,9 +209,20 @@ decompress_file_internal(FILE* f_in, FILE* f_out,
             /* Update input */
             srcPtr += srcSize;
         }
+
+        assert(srcPtr <= srcEnd);
+
+        /* Ensure all input data has been consumed.
+         * It is valid to have multiple frames in the same file,
+         * but this example only supports one frame.
+         */
+        if (srcPtr < srcEnd) {
+            printf("Decompress: Trailing data left in file after frame\n");
+            return 1;
+        }
     }
 
-    /* Check that there isn't trailing input data after the frame.
+    /* Check that there isn't trailing data in the file after the frame.
      * It is valid to have multiple frames in the same file,
      * but this example only supports one frame.
      */
@@ -260,7 +272,7 @@ decompress_file_allocDst(FILE* f_in, FILE* f_out,
     int const decompressionResult = decompress_file_internal(
                         f_in, f_out,
                         dctx,
-                        src, srcCapacity, readSize, consumedSize,
+                        src, srcCapacity, readSize-consumedSize, consumedSize,
                         dst, dstCapacity);
 
     free(dst);
@@ -278,7 +290,7 @@ static int decompress_file(FILE* f_in, FILE* f_out)
     if (!src) { perror("decompress_file(src)"); return 1; }
 
     LZ4F_dctx* dctx;
-    {   size_t const dctxStatus = LZ4F_createDecompressionContext(&dctx, 100);
+    {   size_t const dctxStatus = LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION);
         if (LZ4F_isError(dctxStatus)) {
             printf("LZ4F_dctx creation error: %s\n", LZ4F_getErrorName(dctxStatus));
     }   }
diff --git a/lib/Makefile b/lib/Makefile
index abb6c075a..88d9b4f28 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -45,6 +45,7 @@ LIBVER  := $(shell echo $(LIBVER_SCRIPT))
 BUILD_SHARED:=yes
 BUILD_STATIC:=yes
 
+OS ?= $(shell uname)
 CPPFLAGS+= -DXXH_NAMESPACE=LZ4_
 CFLAGS  ?= -O3
 DEBUGFLAGS:= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
@@ -58,7 +59,7 @@ SRCFILES := $(sort $(wildcard *.c))
 
 # OS X linker doesn't support -soname, and use different extension
 # see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html
-ifeq ($(shell uname), Darwin)
+ifeq ($(OS), Darwin)
 	SHARED_EXT = dylib
 	SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT)
 	SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT)
@@ -123,7 +124,7 @@ clean:
 #-----------------------------------------------------------------------------
 # make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets
 #-----------------------------------------------------------------------------
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS))
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD))
 
 .PHONY: listL120
 listL120:  # extract lines >= 120 characters in *.{c,h}, by Takayuki Matsuoka (note : $$, for Makefile compatibility)
@@ -142,14 +143,14 @@ libdir      ?= $(LIBDIR)
 INCLUDEDIR  ?= $(prefix)/include
 includedir  ?= $(INCLUDEDIR)
 
-ifneq (,$(filter $(shell uname),OpenBSD FreeBSD NetBSD DragonFly))
+ifneq (,$(filter $(OS),OpenBSD FreeBSD NetBSD DragonFly))
 PKGCONFIGDIR ?= $(prefix)/libdata/pkgconfig
 else
 PKGCONFIGDIR ?= $(libdir)/pkgconfig
 endif
 pkgconfigdir ?= $(PKGCONFIGDIR)
 
-ifneq (,$(filter $(shell uname),SunOS))
+ifneq (,$(filter $(OS),SunOS))
 INSTALL ?= ginstall
 else
 INSTALL ?= install
diff --git a/lib/lz4.c b/lib/lz4.c
index e51a3e0a4..4046102e6 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -1,6 +1,6 @@
 /*
    LZ4 - Fast LZ compression algorithm
-   Copyright (C) 2011-2017, Yann Collet.
+   Copyright (C) 2011-present, Yann Collet.
 
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
@@ -297,8 +297,9 @@ void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
 #define MINMATCH 4
 
 #define WILDCOPYLENGTH 8
-#define LASTLITERALS 5
-#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
+#define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
+#define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
+#define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
 static const int LZ4_minLength = (MFLIMIT+1);
 
 #define KB *(1 <<10)
@@ -483,9 +484,6 @@ typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
 typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
 typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
 
-typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
-typedef enum { full = 0, partial = 1 } earlyEnd_directive;
-
 
 /*-************************************
 *  Local Utils
@@ -496,6 +494,21 @@ int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
 int LZ4_sizeofState() { return LZ4_STREAMSIZE; }
 
 
+/*-************************************
+*  Internal Definitions used in Tests
+**************************************/
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize);
+
+int LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize, const void* dict, size_t dictSize);
+
+#if defined (__cplusplus)
+}
+#endif
+
 /*-******************************
 *  Compression functions
 ********************************/
@@ -669,9 +682,9 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
 
     /* the dictCtx currentOffset is indexed on the start of the dictionary,
      * while a dictionary in the current context precedes the currentOffset */
-    const BYTE* dictBase = dictDirective == usingDictCtx ?
-        dictionary + dictSize - dictCtx->currentOffset :
-        dictionary + dictSize - startIndex;
+    const BYTE* dictBase = (dictDirective == usingDictCtx) ?
+                            dictionary + dictSize - dictCtx->currentOffset :
+                            dictionary + dictSize - startIndex;
 
     BYTE* op = (BYTE*) dest;
     BYTE* const olimit = op + maxOutputSize;
@@ -699,7 +712,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
         cctx->dictSize += (U32)inputSize;
     }
     cctx->currentOffset += (U32)inputSize;
-    cctx->tableType = tableType;
+    cctx->tableType = (U16)tableType;
 
     if (inputSize<LZ4_minLength) goto _last_literals;        /* Input too small, no compression (all literals) */
 
@@ -1370,26 +1383,32 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
 
 
 
-/*-*****************************
-*  Decompression functions
-*******************************/
+/*-*******************************
+ *  Decompression functions
+ ********************************/
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
+
+#undef MIN
+#define MIN(a,b)    ( (a) < (b) ? (a) : (b) )
+
 /*! LZ4_decompress_generic() :
  *  This generic decompression function covers all use cases.
  *  It shall be instantiated several times, using different sets of directives.
  *  Note that it is important for performance that this function really get inlined,
  *  in order to remove useless branches during compilation optimization.
  */
-LZ4_FORCE_O2_GCC_PPC64LE
-LZ4_FORCE_INLINE int LZ4_decompress_generic(
+LZ4_FORCE_INLINE int
+LZ4_decompress_generic(
                  const char* const src,
                  char* const dst,
                  int srcSize,
                  int outputSize,         /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
 
-                 int endOnInput,         /* endOnOutputSize, endOnInputSize */
-                 int partialDecoding,    /* full, partial */
-                 int targetOutputSize,   /* only used if partialDecoding==partial */
-                 int dict,               /* noDict, withPrefix64k, usingExtDict */
+                 endCondition_directive endOnInput,   /* endOnOutputSize, endOnInputSize */
+                 earlyEnd_directive partialDecoding,  /* full, partial */
+                 dict_directive dict,                 /* noDict, withPrefix64k, usingExtDict */
                  const BYTE* const lowPrefix,  /* always <= dst, == dst when no prefix */
                  const BYTE* const dictStart,  /* only if dict==usingExtDict */
                  const size_t dictSize         /* note : = 0 if noDict */
@@ -1401,7 +1420,6 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
     BYTE* op = (BYTE*) dst;
     BYTE* const oend = op + outputSize;
     BYTE* cpy;
-    BYTE* oexit = op + targetOutputSize;
 
     const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
     const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
@@ -1414,12 +1432,13 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
     const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
     const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
 
-    DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i)", srcSize);
+    DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
 
     /* Special cases */
-    if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT;                      /* targetOutputSize too high => just decode everything */
+    assert(lowPrefix <= op);
+    assert(src != NULL);
     if ((endOnInput) && (unlikely(outputSize==0))) return ((srcSize==1) && (*ip==0)) ? 0 : -1;  /* Empty output buffer */
-    if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
+    if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0 ? 1 : -1);
     if ((endOnInput) && unlikely(srcSize==0)) return -1;
 
     /* Main Loop : decode sequences */
@@ -1428,7 +1447,7 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
         size_t offset;
 
         unsigned const token = *ip++;
-        size_t length = token >> ML_BITS; /* literal length */
+        size_t length = token >> ML_BITS;  /* literal length */
 
         assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
 
@@ -1453,6 +1472,7 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
             length = token & ML_MASK; /* match length */
             offset = LZ4_readLE16(ip); ip += 2;
             match = op - offset;
+            assert(match <= op); /* check overflow */
 
             /* Do not deal with overlapping matches. */
             if ( (length != ML_MASK)
@@ -1486,11 +1506,12 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
 
         /* copy literals */
         cpy = op+length;
-        if ( ((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
-            || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
+        LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+        if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
+          || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
         {
             if (partialDecoding) {
-                if (cpy > oend) goto _output_error;                           /* Error : write attempt beyond end of output buffer */
+                if (cpy > oend) { cpy = oend; length = oend-op; }             /* Partial decoding : stop in the middle of literal segment */
                 if ((endOnInput) && (ip+length > iend)) goto _output_error;   /* Error : read attempt beyond end of input buffer */
             } else {
                 if ((!endOnInput) && (cpy != oend)) goto _output_error;       /* Error : block decoding must stop exactly there */
@@ -1499,10 +1520,15 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
             memcpy(op, ip, length);
             ip += length;
             op += length;
-            break;     /* Necessarily EOF, due to parsing restrictions */
+            if (!partialDecoding || (cpy == oend)) {
+                /* Necessarily EOF, due to parsing restrictions */
+                break;
+            }
+
+        } else {
+            LZ4_wildCopy(op, ip, cpy);   /* may overwrite up to WILDCOPYLENGTH beyond cpy */
+            ip += length; op = cpy;
         }
-        LZ4_wildCopy(op, ip, cpy);
-        ip += length; op = cpy;
 
         /* get offset */
         offset = LZ4_readLE16(ip); ip+=2;
@@ -1513,7 +1539,11 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
 
 _copy_match:
         if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
-        LZ4_write32(op, (U32)offset);   /* costs ~1%; silence an msan warning when offset==0 */
+        if (!partialDecoding) {
+            assert(oend > op);
+            assert(oend - op >= 4);
+            LZ4_write32(op, 0);   /* silence an msan warning when offset==0; costs <1%; */
+        }   /* note : when partialDecoding, there is no guarantee that at least 4 bytes remain available in output buffer */
 
         if (length == ML_MASK) {
             unsigned s;
@@ -1526,21 +1556,24 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
         }
         length += MINMATCH;
 
-        /* check external dictionary */
+        /* match starting within external dictionary */
         if ((dict==usingExtDict) && (match < lowPrefix)) {
-            if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error;   /* doesn't respect parsing restriction */
+            if (unlikely(op+length > oend-LASTLITERALS)) {
+                if (partialDecoding) length = MIN(length, (size_t)(oend-op));
+                else goto _output_error;   /* doesn't respect parsing restriction */
+            }
 
             if (length <= (size_t)(lowPrefix-match)) {
-                /* match can be copied as a single segment from external dictionary */
+                /* match fits entirely within external dictionary : just copy */
                 memmove(op, dictEnd - (lowPrefix-match), length);
                 op += length;
             } else {
-                /* match encompass external dictionary and current block */
-                size_t const copySize = (size_t)(lowPrefix-match);
+                /* match stretches into both external dictionary and current block */
+                size_t const copySize = (size_t)(lowPrefix - match);
                 size_t const restSize = length - copySize;
                 memcpy(op, dictEnd - copySize, copySize);
                 op += copySize;
-                if (restSize > (size_t)(op-lowPrefix)) {  /* overlap copy */
+                if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
                     BYTE* const endOfMatch = op + restSize;
                     const BYTE* copyFrom = lowPrefix;
                     while (op < endOfMatch) *op++ = *copyFrom++;
@@ -1553,6 +1586,23 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
 
         /* copy match within block */
         cpy = op + length;
+
+        /* partialDecoding : may not respect endBlock parsing restrictions */
+        assert(op<=oend);
+        if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
+            size_t const mlen = MIN(length, (size_t)(oend-op));
+            const BYTE* const matchEnd = match + mlen;
+            BYTE* const copyEnd = op + mlen;
+            if (matchEnd > op) {   /* overlap copy */
+                while (op < copyEnd) *op++ = *match++;
+            } else {
+                memcpy(op, match, mlen);
+            }
+            op = copyEnd;
+            if (op==oend) break;
+            continue;
+        }
+
         if (unlikely(offset<8)) {
             op[0] = match[0];
             op[1] = match[1];
@@ -1561,23 +1611,26 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
             match += inc32table[offset];
             memcpy(op+4, match, 4);
             match -= dec64table[offset];
-        } else { memcpy(op, match, 8); match+=8; }
+        } else {
+            memcpy(op, match, 8);
+            match += 8;
+        }
         op += 8;
 
-        if (unlikely(cpy>oend-12)) {
-            BYTE* const oCopyLimit = oend-(WILDCOPYLENGTH-1);
+        if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
+            BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
             if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
             if (op < oCopyLimit) {
                 LZ4_wildCopy(op, match, oCopyLimit);
                 match += oCopyLimit - op;
                 op = oCopyLimit;
             }
-            while (op<cpy) *op++ = *match++;
+            while (op < cpy) *op++ = *match++;
         } else {
             memcpy(op, match, 8);
-            if (length>16) LZ4_wildCopy(op+8, match+8, cpy);
+            if (length > 16) LZ4_wildCopy(op+8, match+8, cpy);
         }
-        op = cpy;   /* correction */
+        op = cpy;   /* wildcopy correction */
     }
 
     /* end of decoding */
@@ -1598,23 +1651,24 @@ LZ4_FORCE_O2_GCC_PPC64LE
 int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
 {
     return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
-                                  endOnInputSize, full, 0, noDict,
+                                  endOnInputSize, decode_full_block, noDict,
                                   (BYTE*)dest, NULL, 0);
 }
 
 LZ4_FORCE_O2_GCC_PPC64LE
-int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize)
+int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity)
 {
-    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
-                                  endOnInputSize, partial, targetOutputSize,
-                                  noDict, (BYTE*)dest, NULL, 0);
+    dstCapacity = MIN(targetOutputSize, dstCapacity);
+    return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
+                                  endOnInputSize, partial_decode,
+                                  noDict, (BYTE*)dst, NULL, 0);
 }
 
 LZ4_FORCE_O2_GCC_PPC64LE
 int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
 {
     return LZ4_decompress_generic(source, dest, 0, originalSize,
-                                  endOnOutputSize, full, 0, withPrefix64k,
+                                  endOnOutputSize, decode_full_block, withPrefix64k,
                                   (BYTE*)dest - 64 KB, NULL, 0);
 }
 
@@ -1624,7 +1678,7 @@ LZ4_FORCE_O2_GCC_PPC64LE /* Exported, an obsolete API function. */
 int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
 {
     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
-                                  endOnInputSize, full, 0, withPrefix64k,
+                                  endOnInputSize, decode_full_block, withPrefix64k,
                                   (BYTE*)dest - 64 KB, NULL, 0);
 }
 
@@ -1641,17 +1695,17 @@ static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, i
                                                size_t prefixSize)
 {
     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
-                                  endOnInputSize, full, 0, noDict,
+                                  endOnInputSize, decode_full_block, noDict,
                                   (BYTE*)dest-prefixSize, NULL, 0);
 }
 
-LZ4_FORCE_O2_GCC_PPC64LE /* Exported under another name, for tests/fullbench.c */
-#define LZ4_decompress_safe_extDict LZ4_decompress_safe_forceExtDict
-int LZ4_decompress_safe_extDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
-                                const void* dictStart, size_t dictSize)
+LZ4_FORCE_O2_GCC_PPC64LE
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
+                                     int compressedSize, int maxOutputSize,
+                                     const void* dictStart, size_t dictSize)
 {
     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
-                                  endOnInputSize, full, 0, usingExtDict,
+                                  endOnInputSize, decode_full_block, usingExtDict,
                                   (BYTE*)dest, (const BYTE*)dictStart, dictSize);
 }
 
@@ -1660,7 +1714,7 @@ static int LZ4_decompress_fast_extDict(const char* source, char* dest, int origi
                                        const void* dictStart, size_t dictSize)
 {
     return LZ4_decompress_generic(source, dest, 0, originalSize,
-                                  endOnOutputSize, full, 0, usingExtDict,
+                                  endOnOutputSize, decode_full_block, usingExtDict,
                                   (BYTE*)dest, (const BYTE*)dictStart, dictSize);
 }
 
@@ -1673,7 +1727,7 @@ int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compresse
                                    size_t prefixSize, const void* dictStart, size_t dictSize)
 {
     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
-                                  endOnInputSize, full, 0, usingExtDict,
+                                  endOnInputSize, decode_full_block, usingExtDict,
                                   (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
 }
 
@@ -1682,7 +1736,7 @@ int LZ4_decompress_fast_doubleDict(const char* source, char* dest, int originalS
                                    size_t prefixSize, const void* dictStart, size_t dictSize)
 {
     return LZ4_decompress_generic(source, dest, 0, originalSize,
-                                  endOnOutputSize, full, 0, usingExtDict,
+                                  endOnOutputSize, decode_full_block, usingExtDict,
                                   (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
 }
 
@@ -1773,8 +1827,8 @@ int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch
         /* The buffer wraps around, or they're switching to another buffer. */
         lz4sd->extDictSize = lz4sd->prefixSize;
         lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
-        result = LZ4_decompress_safe_extDict(source, dest, compressedSize, maxOutputSize,
-                                             lz4sd->externalDict, lz4sd->extDictSize);
+        result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize,
+                                                  lz4sd->externalDict, lz4sd->extDictSize);
         if (result <= 0) return result;
         lz4sd->prefixSize = result;
         lz4sd->prefixEnd  = (BYTE*)dest + result;
@@ -1834,7 +1888,7 @@ int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressed
             return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
         return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, dictSize);
     }
-    return LZ4_decompress_safe_extDict(source, dest, compressedSize, maxOutputSize, dictStart, dictSize);
+    return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, dictSize);
 }
 
 int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
diff --git a/lib/lz4.h b/lib/lz4.h
index 7d1312219..059ef7c1b 100644
--- a/lib/lz4.h
+++ b/lib/lz4.h
@@ -1,7 +1,7 @@
 /*
  *  LZ4 - Fast LZ compression algorithm
  *  Header File
- *  Copyright (C) 2011-2017, Yann Collet.
+ *  Copyright (C) 2011-present, Yann Collet.
 
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
@@ -46,7 +46,7 @@ extern "C" {
 /**
   Introduction
 
-  LZ4 is lossless compression algorithm, providing compression speed at 400 MB/s per core,
+  LZ4 is lossless compression algorithm, providing compression speed at 500 MB/s per core,
   scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
   multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
 
@@ -62,8 +62,8 @@ extern "C" {
 
   An additional format, called LZ4 frame specification (doc/lz4_Frame_format.md),
   take care of encoding standard metadata alongside LZ4-compressed blocks.
-  If your application requires interoperability, it's recommended to use it.
-  A library is provided to take care of it, see lz4frame.h.
+  Frame format is required for interoperability.
+  It is delivered through a companion API, declared in lz4frame.h.
 */
 
 /*^***************************************************************
@@ -93,7 +93,7 @@ extern "C" {
 /*------   Version   ------*/
 #define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
 #define LZ4_VERSION_MINOR    8    /* for new (non-breaking) interface capabilities */
-#define LZ4_VERSION_RELEASE  2    /* for tweaks, bug-fixes, or development */
+#define LZ4_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */
 
 #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
 
@@ -183,55 +183,72 @@ LZ4_compress_fast_extState() :
     Same compression function, just using an externally allocated memory space to store compression state.
     Use LZ4_sizeofState() to know how much memory must be allocated,
     and allocate it on 8-bytes boundaries (using malloc() typically).
-    Then, provide it as 'void* state' to compression function.
+    Then, provide this buffer as 'void* state' to compression function.
 */
 LZ4LIB_API int LZ4_sizeofState(void);
 LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
 
 
-/*!
-LZ4_compress_destSize() :
-    Reverse the logic : compresses as much data as possible from 'src' buffer
-    into already allocated buffer 'dst' of size 'targetDestSize'.
-    This function either compresses the entire 'src' content into 'dst' if it's large enough,
-    or fill 'dst' buffer completely with as much data as possible from 'src'.
-        *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'.
-                      New value is necessarily <= old value.
-        return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
-                 or 0 if compression fails
+/*! LZ4_compress_destSize() :
+ *  Reverse the logic : compresses as much data as possible from 'src' buffer
+ *  into already allocated buffer 'dst', of size >= 'targetDestSize'.
+ *  This function either compresses the entire 'src' content into 'dst' if it's large enough,
+ *  or fill 'dst' buffer completely with as much data as possible from 'src'.
+ *  note: acceleration parameter is fixed to "default".
+ *
+ * *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'.
+ *               New value is necessarily <= input value.
+ * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
+ *           or 0 if compression fails.
 */
 LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);
 
 
-/*!
-LZ4_decompress_fast() : **unsafe!**
-This function is a bit faster than LZ4_decompress_safe(),
-but it may misbehave on malformed input because it doesn't perform full validation of compressed data.
-    originalSize : is the uncompressed size to regenerate
-                   Destination buffer must be already allocated, and its size must be >= 'originalSize' bytes.
-    return : number of bytes read from source buffer (== compressed size).
-             If the source stream is detected malformed, the function stops decoding and return a negative result.
-    note : This function is only usable if the originalSize of uncompressed data is known in advance.
-           The caller should also check that all the compressed input has been consumed properly,
-           i.e. that the return value matches the size of the buffer with compressed input.
-           The function never writes past the output buffer.  However, since it doesn't know its 'src' size,
-           it may read past the intended input.  Also, because match offsets are not validated during decoding,
-           reads from 'src' may underflow.  Use this function in trusted environment **only**.
-*/
+/*! LZ4_decompress_fast() : **unsafe!**
+ *  This function used to be a bit faster than LZ4_decompress_safe(),
+ *  though situation has changed in recent versions,
+ *  and now `LZ4_decompress_safe()` can be as fast and sometimes faster than `LZ4_decompress_fast()`.
+ *  Moreover, LZ4_decompress_fast() is not protected vs malformed input, as it doesn't perform full validation of compressed data.
+ *  As a consequence, this function is no longer recommended, and may be deprecated in future versions.
+ *  It's only remaining specificity is that it can decompress data without knowing its compressed size.
+ *
+ *  originalSize : is the uncompressed size to regenerate.
+ *                 `dst` must be already allocated, its size must be >= 'originalSize' bytes.
+ * @return : number of bytes read from source buffer (== compressed size).
+ *           If the source stream is detected malformed, the function stops decoding and returns a negative result.
+ *  note : This function requires uncompressed originalSize to be known in advance.
+ *         The function never writes past the output buffer.
+ *         However, since it doesn't know its 'src' size, it may read past the intended input.
+ *         Also, because match offsets are not validated during decoding,
+ *         reads from 'src' may underflow.
+ *         Use this function in trusted environment **only**.
+ */
 LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
 
-/*!
-LZ4_decompress_safe_partial() :
-    This function decompress a compressed block of size 'srcSize' at position 'src'
-    into destination buffer 'dst' of size 'dstCapacity'.
-    The function will decompress a minimum of 'targetOutputSize' bytes, and stop after that.
-    However, it's not accurate, and may write more than 'targetOutputSize' (but always <= dstCapacity).
-   @return : the number of bytes decoded in the destination buffer (necessarily <= dstCapacity)
-        Note : this number can also be < targetOutputSize, if compressed block contains less data.
-            Therefore, always control how many bytes were decoded.
-            If source stream is detected malformed, function returns a negative result.
-            This function is protected against malicious data packets.
-*/
+/*! LZ4_decompress_safe_partial() :
+ *  Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
+ *  into destination buffer 'dst' of size 'dstCapacity'.
+ *  Up to 'targetOutputSize' bytes will be decoded.
+ *  The function stops decoding on reaching this objective,
+ *  which can boost performance when only the beginning of a block is required.
+ *
+ * @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity)
+ *           If source stream is detected malformed, function returns a negative result.
+ *
+ *  Note : @return can be < targetOutputSize, if compressed block contains less data.
+ *
+ *  Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity,
+ *           and expects targetOutputSize <= dstCapacity.
+ *           It effectively stops decoding on reaching targetOutputSize,
+ *           so dstCapacity is kind of redundant.
+ *           This is because in a previous version of this function,
+ *           decoding operation would not "break" a sequence in the middle.
+ *           As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize,
+ *           it could write more bytes, though only up to dstCapacity.
+ *           Some "margin" used to be required for this operation to work properly.
+ *           This is no longer necessary.
+ *           The function nonetheless keeps its signature, in an effort to not break API.
+ */
 LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
 
 
@@ -266,16 +283,23 @@ LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, in
  *  'dst' buffer must be already allocated.
  *  If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
  *
- *  Important : The previous 64KB of compressed data is assumed to remain present and unmodified in memory!
- *
- *  Special 1 : When input is a double-buffer, they can have any size, including < 64 KB.
- *              Make sure that buffers are separated by at least one byte.
- *              This way, each block only depends on previous block.
- *  Special 2 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
- *
  * @return : size of compressed block
  *           or 0 if there is an error (typically, cannot fit into 'dst').
- *  After an error, the stream status is invalid, it can only be reset or freed.
+ *
+ *  Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block.
+ *           Each block has precise boundaries.
+ *           It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together.
+ *           Each block must be decompressed separately, calling LZ4_decompress_*() with associated metadata.
+ *
+ *  Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory!
+ *
+ *  Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB.
+ *           Make sure that buffers are separated, by at least one byte.
+ *           This construction ensures that each block only depends on previous block.
+ *
+ *  Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
+ *
+ *  Note 5 : After an error, the stream status is invalid, it can only be reset or freed.
  */
 LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
 
@@ -305,7 +329,7 @@ LZ4LIB_API int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_str
 /*! LZ4_setStreamDecode() :
  *  An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
  *  Use this function to start decompression of a new stream of blocks.
- *  A dictionary can optionnally be set. Use NULL or size 0 for a reset order.
+ *  A dictionary can optionally be set. Use NULL or size 0 for a reset order.
  *  Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
  * @return : 1 if OK, 0 if error
  */
diff --git a/lib/lz4frame.c b/lib/lz4frame.c
index e1d0b1d02..08bf0faee 100644
--- a/lib/lz4frame.c
+++ b/lib/lz4frame.c
@@ -738,7 +738,7 @@ static size_t LZ4F_makeBlock(void* dst, const void* src, size_t srcSize,
 
 static int LZ4F_compressBlock(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
 {
-    int const acceleration = (level < -1) ? -level : 1;
+    int const acceleration = (level < 0) ? -level + 1 : 1;
     LZ4F_initStream(ctx, cdict, level, LZ4F_blockIndependent);
     if (cdict) {
         return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, acceleration);
@@ -749,7 +749,7 @@ static int LZ4F_compressBlock(void* ctx, const char* src, char* dst, int srcSize
 
 static int LZ4F_compressBlock_continue(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
 {
-    int const acceleration = (level < -1) ? -level : 1;
+    int const acceleration = (level < 0) ? -level + 1 : 1;
     (void)cdict; /* init once at beginning of frame */
     return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, acceleration);
 }
diff --git a/lib/lz4frame.h b/lib/lz4frame.h
index fb434ff76..75f1fd91b 100644
--- a/lib/lz4frame.h
+++ b/lib/lz4frame.h
@@ -33,9 +33,10 @@
 */
 
 /* LZ4F is a stand-alone API to create LZ4-compressed frames
- * conformant with specification v1.5.1.
+ * conformant with specification v1.6.1.
  * It also offers streaming capabilities.
- * lz4.h is not required when using lz4frame.h.
+ * lz4.h is not required when using lz4frame.h,
+ * except to get constant such as LZ4_VERSION_NUMBER.
  * */
 
 #ifndef LZ4F_H_09782039843
@@ -159,8 +160,9 @@ typedef LZ4F_contentChecksum_t contentChecksum_t;
 
 /*! LZ4F_frameInfo_t :
  *  makes it possible to set or read frame parameters.
- *  It's not required to set all fields, as long as the structure was initially memset() to zero.
- *  For all fields, 0 sets it to default value */
+ *  Structure must be first init to 0, using memset() or LZ4F_INIT_FRAMEINFO,
+ *  setting all parameters to default.
+ *  It's then possible to update selectively some parameters */
 typedef struct {
   LZ4F_blockSizeID_t     blockSizeID;         /* max64KB, max256KB, max1MB, max4MB; 0 == default */
   LZ4F_blockMode_t       blockMode;           /* LZ4F_blockLinked, LZ4F_blockIndependent; 0 == default */
@@ -171,24 +173,30 @@ typedef struct {
   LZ4F_blockChecksum_t   blockChecksumFlag;   /* 1: each block followed by a checksum of block's compressed data; 0: disabled (default) */
 } LZ4F_frameInfo_t;
 
+#define LZ4F_INIT_FRAMEINFO   { 0, 0, 0, 0, 0, 0, 0 }    /* v1.8.3+ */
+
 /*! LZ4F_preferences_t :
- *  makes it possible to supply detailed compression parameters to the stream interface.
- *  Structure is presumed initially memset() to zero, representing default settings.
+ *  makes it possible to supply advanced compression instructions to streaming interface.
+ *  Structure must be first init to 0, using memset() or LZ4F_INIT_PREFERENCES,
+ *  setting all parameters to default.
  *  All reserved fields must be set to zero. */
 typedef struct {
   LZ4F_frameInfo_t frameInfo;
   int      compressionLevel;    /* 0: default (fast mode); values > LZ4HC_CLEVEL_MAX count as LZ4HC_CLEVEL_MAX; values < 0 trigger "fast acceleration" */
-  unsigned autoFlush;           /* 1: always flush, to reduce usage of internal buffers */
-  unsigned favorDecSpeed;       /* 1: parser favors decompression speed vs compression ratio. Only works for high compression modes (>= LZ4LZ4HC_CLEVEL_OPT_MIN) */  /* >= v1.8.2 */
+  unsigned autoFlush;           /* 1: always flush; reduces usage of internal buffers */
+  unsigned favorDecSpeed;       /* 1: parser favors decompression speed vs compression ratio. Only works for high compression modes (>= LZ4HC_CLEVEL_OPT_MIN) */  /* v1.8.2+ */
   unsigned reserved[3];         /* must be zero for forward compatibility */
 } LZ4F_preferences_t;
 
-LZ4FLIB_API int LZ4F_compressionLevel_max(void);
+#define LZ4F_INIT_PREFERENCES   { LZ4F_INIT_FRAMEINFO, 0, 0, 0, { 0, 0, 0 } }    /* v1.8.3+ */
 
 
 /*-*********************************
 *  Simple compression function
 ***********************************/
+
+LZ4FLIB_API int LZ4F_compressionLevel_max(void);
+
 /*! LZ4F_compressFrameBound() :
  *  Returns the maximum possible compressed size with LZ4F_compressFrame() given srcSize and preferences.
  * `preferencesPtr` is optional. It can be replaced by NULL, in which case, the function will assume default preferences.
@@ -222,8 +230,9 @@ typedef struct {
 
 /*---   Resource Management   ---*/
 
-#define LZ4F_VERSION 100
+#define LZ4F_VERSION 100    /* This number can be used to check for an incompatible API breaking change */
 LZ4FLIB_API unsigned LZ4F_getVersion(void);
+
 /*! LZ4F_createCompressionContext() :
  * The first thing to do is to create a compressionContext object, which will be used in all compression operations.
  * This is achieved using LZ4F_createCompressionContext(), which takes as argument a version.
diff --git a/lib/lz4hc.c b/lib/lz4hc.c
index 8108ea011..e913ee7b3 100644
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c
@@ -327,6 +327,8 @@ LZ4HC_InsertAndGetWiderMatch (
                             if (lookBackLength==0) {  /* no back possible */
                                 size_t const maxML = MIN(currentSegmentLength, srcPatternLength);
                                 if ((size_t)longest < maxML) {
+                                    assert(base + matchIndex < ip);
+                                    if (ip - (base+matchIndex) > MAX_DISTANCE) break;
                                     assert(maxML < 2 GB);
                                     longest = (int)maxML;
                                     *matchpos = base + matchIndex;   /* virtual pos, relative to ip, to retrieve offset */
@@ -450,6 +452,7 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
     *op += length;
 
     /* Encode Offset */
+    assert( (*ip - match) <= MAX_DISTANCE );   /* note : consider providing offset as a value, rather than as a pointer difference */
     LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2;
 
     /* Encode MatchLength */
diff --git a/lib/lz4hc.h b/lib/lz4hc.h
index bb5e07373..970fa3966 100644
--- a/lib/lz4hc.h
+++ b/lib/lz4hc.h
@@ -246,6 +246,10 @@ LZ4_DEPRECATED("use LZ4_resetStreamHC() instead") LZ4LIB_API  int   LZ4_resetStr
 #ifndef LZ4_HC_SLO_098092834
 #define LZ4_HC_SLO_098092834
 
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
 /*! LZ4_compress_HC_destSize() : v1.8.0 (experimental)
  *  Will try to compress as much data from `src` as possible
  *  that can fit into `targetDstSize` budget.
@@ -343,5 +347,9 @@ int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* ds
  */
 LZ4LIB_API void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream);
 
+#if defined (__cplusplus)
+}
+#endif
+
 #endif   /* LZ4_HC_SLO_098092834 */
 #endif   /* LZ4_HC_STATIC_LINKING_ONLY */
diff --git a/programs/Makefile b/programs/Makefile
index 72bdcaac1..bd33d9be0 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -82,7 +82,7 @@ lz4-release: DEBUGFLAGS=
 lz4-release: lz4
 
 lz4c: lz4
-	ln -s lz4 lz4c
+	ln -s lz4$(EXT) lz4c$(EXT)
 
 lz4c32: CFLAGS += -m32
 lz4c32 : $(SRCFILES)
@@ -102,20 +102,20 @@ preview-man: clean-man man
 clean:
 	@$(MAKE) -C $(LZ4DIR) $@ > $(VOID)
 	@$(RM) core *.o *.test tmp* \
-           lz4$(EXT) lz4c$(EXT) lz4c32$(EXT) unlz4 lz4cat
+           lz4$(EXT) lz4c$(EXT) lz4c32$(EXT) unlz4$(EXT) lz4cat$(EXT)
 	@echo Cleaning completed
 
 
 #-----------------------------------------------------------------------------
 # make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets
 #-----------------------------------------------------------------------------
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS))
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD))
 
 unlz4: lz4
-	ln -s lz4 unlz4
+	ln -s lz4$(EXT) unlz4$(EXT)
 
 lz4cat: lz4
-	ln -s lz4 lz4cat
+	ln -s lz4$(EXT) lz4cat$(EXT)
 
 DESTDIR     ?=
 # directory variables : GNU conventions prefer lowercase
@@ -147,10 +147,10 @@ INSTALL_DATA    ?= $(INSTALL) -m 644
 install: lz4
 	@echo Installing binaries
 	@$(INSTALL) -d -m 755 $(DESTDIR)$(bindir)/ $(DESTDIR)$(man1dir)/
-	@$(INSTALL_PROGRAM) lz4 $(DESTDIR)$(bindir)/lz4
-	@ln -sf lz4 $(DESTDIR)$(bindir)/lz4c
-	@ln -sf lz4 $(DESTDIR)$(bindir)/lz4cat
-	@ln -sf lz4 $(DESTDIR)$(bindir)/unlz4
+	@$(INSTALL_PROGRAM) lz4$(EXT) $(DESTDIR)$(bindir)/lz4$(EXT)
+	@ln -sf lz4$(EXT) $(DESTDIR)$(bindir)/lz4c$(EXT)
+	@ln -sf lz4$(EXT) $(DESTDIR)$(bindir)/lz4cat$(EXT)
+	@ln -sf lz4$(EXT) $(DESTDIR)$(bindir)/unlz4$(EXT)
 	@echo Installing man pages
 	@$(INSTALL_DATA) lz4.1 $(DESTDIR)$(man1dir)/lz4.1
 	@ln -sf lz4.1 $(DESTDIR)$(man1dir)/lz4c.1
@@ -159,10 +159,10 @@ install: lz4
 	@echo lz4 installation completed
 
 uninstall:
-	@$(RM) $(DESTDIR)$(bindir)/lz4cat
-	@$(RM) $(DESTDIR)$(bindir)/unlz4
-	@$(RM) $(DESTDIR)$(bindir)/lz4
-	@$(RM) $(DESTDIR)$(bindir)/lz4c
+	@$(RM) $(DESTDIR)$(bindir)/lz4cat$(EXT)
+	@$(RM) $(DESTDIR)$(bindir)/unlz4$(EXT)
+	@$(RM) $(DESTDIR)$(bindir)/lz4$(EXT)
+	@$(RM) $(DESTDIR)$(bindir)/lz4c$(EXT)
 	@$(RM) $(DESTDIR)$(man1dir)/lz4.1
 	@$(RM) $(DESTDIR)$(man1dir)/lz4c.1
 	@$(RM) $(DESTDIR)$(man1dir)/lz4cat.1
diff --git a/programs/bench.c b/programs/bench.c
index 770191cfa..11bf0440c 100644
--- a/programs/bench.c
+++ b/programs/bench.c
@@ -49,7 +49,10 @@
 
 #include "lz4.h"
 #define COMPRESSOR0 LZ4_compress_local
-static int LZ4_compress_local(const char* src, char* dst, int srcSize, int dstSize, int clevel) { (void)clevel; return LZ4_compress_default(src, dst, srcSize, dstSize); }
+static int LZ4_compress_local(const char* src, char* dst, int srcSize, int dstSize, int clevel) {
+  int const acceleration = (clevel < 0) ? -clevel + 1 : 1;
+  return LZ4_compress_fast(src, dst, srcSize, dstSize, acceleration);
+}
 #include "lz4hc.h"
 #define COMPRESSOR1 LZ4_compress_HC
 #define DEFAULTCOMPRESSOR COMPRESSOR0
@@ -326,7 +329,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
             {   U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
                 if (crcOrig!=crcCheck) {
                     size_t u;
-                    DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x   \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck);
+                    DISPLAY("\n!!! WARNING !!! %17s : Invalid Checksum : %x != %x   \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck);
                     for (u=0; u<srcSize; u++) {
                         if (((const BYTE*)srcBuffer)[u] != ((const BYTE*)resultBuffer)[u]) {
                             U32 segNb, bNb, pos;
diff --git a/programs/lz4.1 b/programs/lz4.1
index e0f6a81b3..f35e29d2b 100644
--- a/programs/lz4.1
+++ b/programs/lz4.1
@@ -1,5 +1,5 @@
 .
-.TH "LZ4" "1" "2018-01-13" "lz4 1.8.1" "User Commands"
+.TH "LZ4" "1" "September 2018" "lz4 1.8.3" "User Commands"
 .
 .SH "NAME"
 \fBlz4\fR \- lz4, unlz4, lz4cat \- Compress or decompress \.lz4 files
@@ -115,7 +115,11 @@ Benchmark mode, using \fB#\fR compression level\.
 .
 .TP
 \fB\-#\fR
-Compression level, with # being any value from 1 to 16\. Higher values trade compression speed for compression ratio\. Values above 16 are considered the same as 16\. Recommended values are 1 for fast compression (default), and 9 for high compression\. Speed/compression trade\-off will vary depending on data to compress\. Decompression speed remains fast at all settings\.
+Compression level, with # being any value from 1 to 12\. Higher values trade compression speed for compression ratio\. Values above 12 are considered the same as 12\. Recommended values are 1 for fast compression (default), and 9 for high compression\. Speed/compression trade\-off will vary depending on data to compress\. Decompression speed remains fast at all settings\.
+.
+.TP
+\fB\-D dictionaryName\fR
+Compress, decompress or benchmark using dictionary \fIdictionaryName\fR\. Compression and decompression must use the same dictionary to be compatible\. Using a different dictionary during decompression will either abort due to decompression error, or generate a checksum error\.
 .
 .TP
 \fB\-f\fR \fB\-\-[no\-]force\fR
@@ -151,6 +155,10 @@ Block size [4\-7](default : 7)
 Block Dependency (improves compression ratio on small blocks)
 .
 .TP
+\fB\-\-fast[=#]\fR
+switch to ultra\-fast compression levels\. If \fB=#\fR is not present, it defaults to \fB1\fR\. The higher the value, the faster the compression speed, at the cost of some compression ratio\. This setting overwrites compression level if one was set previously\. Similarly, if a compression level is set after \fB\-\-fast\fR, it overrides it\.
+.
+.TP
 \fB\-\-[no\-]frame\-crc\fR
 Select frame checksum (default:enabled)
 .
@@ -214,7 +222,7 @@ Benchmark multiple compression levels, from b# to e# (included)
 .
 .TP
 \fB\-i#\fR
-Minimum evaluation in seconds [1\-9] (default : 3)
+Minimum evaluation time in seconds [1\-9] (default : 3)
 .
 .SH "BUGS"
 Report bugs at: https://github\.com/lz4/lz4/issues
diff --git a/programs/lz4.1.md b/programs/lz4.1.md
index a5168e99d..12b8e29de 100644
--- a/programs/lz4.1.md
+++ b/programs/lz4.1.md
@@ -125,6 +125,19 @@ only the latest one will be applied.
   Speed/compression trade-off will vary depending on data to compress.
   Decompression speed remains fast at all settings.
 
+* `--fast[=#]`:
+  switch to ultra-fast compression levels.
+  The higher the value, the faster the compression speed, at the cost of some compression ratio.
+  If `=#` is not present, it defaults to `1`.
+  This setting overrides compression level if one was set previously.
+  Similarly, if a compression level is set after `--fast`, it overrides it.
+
+* `-D dictionaryName`:
+  Compress, decompress or benchmark using dictionary _dictionaryName_.
+  Compression and decompression must use the same dictionary to be compatible.
+  Using a different dictionary during decompression will either
+  abort due to decompression error, or generate a checksum error.
+
 * `-f` `--[no-]force`:
   This option has several effects:
 
diff --git a/programs/lz4cli.c b/programs/lz4cli.c
index ba519b462..26a8089bd 100644
--- a/programs/lz4cli.c
+++ b/programs/lz4cli.c
@@ -110,7 +110,7 @@ static int usage(const char* exeName)
     DISPLAY( " -9     : High compression \n");
     DISPLAY( " -d     : decompression (default for %s extension)\n", LZ4_EXTENSION);
     DISPLAY( " -z     : force compression \n");
-    DISPLAY( " -D FILE: use dictionary in FILE \n");
+    DISPLAY( " -D FILE: use FILE as dictionary \n");
     DISPLAY( " -f     : overwrite output without prompting \n");
     DISPLAY( " -k     : preserve source files(s)  (default) \n");
     DISPLAY( "--rm    : remove source file(s) after successful de/compression \n");
@@ -141,6 +141,7 @@ static int usage_advanced(const char* exeName)
     DISPLAY( "--content-size : compressed frame includes original size (default:not present)\n");
     DISPLAY( "--[no-]sparse  : sparse mode (default:enabled on file, disabled on stdout)\n");
     DISPLAY( "--favor-decSpeed: compressed files decompress faster, but are less compressed \n");
+    DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1);
     DISPLAY( "Benchmark arguments : \n");
     DISPLAY( " -b#    : benchmark file(s), using # compression level (default : 1) \n");
     DISPLAY( " -e#    : test all compression levels from -bX to # (default : 1)\n");
@@ -272,13 +273,26 @@ static unsigned readU32FromChar(const char** stringPtr)
     return result;
 }
 
+/** longCommandWArg() :
+ *  check if *stringPtr is the same as longCommand.
+ *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
+ * @return 0 and doesn't modify *stringPtr otherwise.
+ */
+static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
+{
+    size_t const comSize = strlen(longCommand);
+    int const result = !strncmp(*stringPtr, longCommand, comSize);
+    if (result) *stringPtr += comSize;
+    return result;
+}
+
 typedef enum { om_auto, om_compress, om_decompress, om_test, om_bench } operationMode_e;
 
 int main(int argc, const char** argv)
 {
     int i,
         cLevel=1,
-        cLevelLast=1,
+        cLevelLast=-10000,
         legacy_format=0,
         forceStdout=0,
         main_pause=0,
@@ -363,6 +377,25 @@ int main(int argc, const char** argv)
                 if (!strcmp(argument,  "--help")) { usage_advanced(exeName); goto _cleanup; }
                 if (!strcmp(argument,  "--keep")) { LZ4IO_setRemoveSrcFile(0); continue; }   /* keep source file (default) */
                 if (!strcmp(argument,  "--rm")) { LZ4IO_setRemoveSrcFile(1); continue; }
+                if (longCommandWArg(&argument, "--fast")) {
+                        /* Parse optional acceleration factor */
+                        if (*argument == '=') {
+                            U32 fastLevel;
+                            ++argument;
+                            fastLevel = readU32FromChar(&argument);
+                            if (fastLevel) {
+                              cLevel = -(int)fastLevel;
+                            } else {
+                              badusage(exeName);
+                            }
+                        } else if (*argument != 0) {
+                            /* Invalid character following --fast */
+                            badusage(exeName);
+                        } else {
+                            cLevel = -1;  /* default for --fast */
+                        }
+                        continue;
+                    }
             }
 
             while (argument[1]!=0) {
diff --git a/programs/lz4io.c b/programs/lz4io.c
index b52c1f32f..28d6537b5 100644
--- a/programs/lz4io.c
+++ b/programs/lz4io.c
@@ -628,16 +628,23 @@ static int LZ4IO_compressFilename_extRess(cRess_t ress, const char* srcFileName,
 
     /* Copy owner, file permissions and modification time */
     {   stat_t statbuf;
-        if (strcmp (srcFileName, stdinmark) && strcmp (dstFileName, stdoutmark) && strcmp (dstFileName, nulmark) && UTIL_getFileStat(srcFileName, &statbuf))
+        if (strcmp (srcFileName, stdinmark)
+         && strcmp (dstFileName, stdoutmark)
+         && strcmp (dstFileName, nulmark)
+         && UTIL_getFileStat(srcFileName, &statbuf)) {
             UTIL_setFileStat(dstFileName, &statbuf);
-    }
+    }   }
 
-    if (g_removeSrcFile) { if (remove(srcFileName)) EXM_THROW(40, "Remove error : %s: %s", srcFileName, strerror(errno)); } /* remove source file : --rm */
+    if (g_removeSrcFile) {  /* remove source file : --rm */
+        if (remove(srcFileName))
+            EXM_THROW(40, "Remove error : %s: %s", srcFileName, strerror(errno));
+    }
 
     /* Final Status */
     DISPLAYLEVEL(2, "\r%79s\r", "");
     DISPLAYLEVEL(2, "Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
-        filesize, compressedfilesize, (double)compressedfilesize/(filesize + !filesize)*100);   /* avoid division by zero */
+                    filesize, compressedfilesize,
+                    (double)compressedfilesize / (filesize + !filesize /* avoid division by zero */ ) * 100);
 
     return 0;
 }
@@ -645,21 +652,25 @@ static int LZ4IO_compressFilename_extRess(cRess_t ress, const char* srcFileName,
 
 int LZ4IO_compressFilename(const char* srcFileName, const char* dstFileName, int compressionLevel)
 {
-    clock_t const start = clock();
+    UTIL_time_t const timeStart = UTIL_getTime();
+    clock_t const cpuStart = clock();
     cRess_t const ress = LZ4IO_createCResources();
 
-    int const issueWithSrcFile = LZ4IO_compressFilename_extRess(ress, srcFileName, dstFileName, compressionLevel);
+    int const result = LZ4IO_compressFilename_extRess(ress, srcFileName, dstFileName, compressionLevel);
 
     /* Free resources */
     LZ4IO_freeCResources(ress);
 
     /* Final Status */
-    {   clock_t const end = clock();
-        double const seconds = (double)(end - start) / CLOCKS_PER_SEC;
-        DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds);
+    {   clock_t const cpuEnd = clock();
+        double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC;
+        U64 const timeLength_ns = UTIL_clockSpanNano(timeStart);
+        double const timeLength_s = (double)timeLength_ns / 1000000000;
+        DISPLAYLEVEL(4, "Completed in %.2f sec  (cpu load : %.0f%%)\n",
+                        timeLength_s, (cpuLoad_s / timeLength_s) * 100);
     }
 
-    return issueWithSrcFile;
+    return result;
 }
 
 
diff --git a/programs/platform.h b/programs/platform.h
index db2efac88..c0b384020 100644
--- a/programs/platform.h
+++ b/programs/platform.h
@@ -81,7 +81,7 @@ extern "C" {
 #if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)) /* UNIX-like OS */ \
    || defined(__midipix__) || defined(__VMS))
 #  if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1–2001 (SUSv3) conformant */ \
-     || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)  /* BSD distros */
+     || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)  || defined(__MidnightBSD__) /* BSD distros */
 #    define PLATFORM_POSIX_VERSION 200112L
 #  else
 #    if defined(__linux__) || defined(__linux)
diff --git a/tests/.gitignore b/tests/.gitignore
index 36dff4207..9aa42a064 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -1,5 +1,5 @@
 
-# test build artefacts
+# build artefacts
 datagen
 frametest
 frametest32
@@ -8,8 +8,12 @@ fullbench32
 fuzzer
 fuzzer32
 fasttest
+roundTripTest
 checkTag
 
 # test artefacts
 tmp*
 versionsTest
+
+# local tests
+afl
diff --git a/tests/Makefile b/tests/Makefile
index d2385614a..3de111b8d 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -63,7 +63,7 @@ NB_LOOPS     ?= -i1
 
 default: all
 
-all: fullbench fuzzer frametest datagen
+all: fullbench fuzzer frametest roundTripTest datagen
 
 all32: CFLAGS+=-m32
 all32: all
@@ -103,6 +103,9 @@ fuzzer  : lz4.o lz4hc.o xxhash.o fuzzer.c
 frametest: lz4frame.o lz4.o lz4hc.o xxhash.o frametest.c
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
+roundTripTest : lz4.o lz4hc.o xxhash.o roundTripTest.c
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
+
 datagen : $(PRGDIR)/datagen.c datagencli.c
 	$(CC) $(FLAGS) -I$(PRGDIR) $^ -o $@$(EXT)
 
@@ -114,7 +117,8 @@ clean:
         fullbench$(EXT) fullbench32$(EXT) \
         fuzzer$(EXT) fuzzer32$(EXT) \
         frametest$(EXT) frametest32$(EXT) \
-        fasttest$(EXT) datagen$(EXT) checkTag$(EXT)
+        fasttest$(EXT) roundTripTest$(EXT) \
+        datagen$(EXT) checkTag$(EXT)
 	@rm -fR $(TESTDIR)
 	@echo Cleaning completed
 
@@ -129,7 +133,7 @@ checkTag: checkTag.c $(LZ4DIR)/lz4.h
 #-----------------------------------------------------------------------------
 # validated only for Linux, OSX, BSD, Hurd and Solaris targets
 #-----------------------------------------------------------------------------
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS))
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD))
 
 MD5:=md5sum
 ifneq (,$(filter $(shell uname), Darwin ))
@@ -262,8 +266,17 @@ test-lz4-basic: lz4 datagen unlz4 lz4cat
 	cat tmp-tlb-hw >> tmp-tlb-hw.lz4
 	$(LZ4) -f tmp-tlb-hw.lz4               # uncompress valid frame followed by invalid data
 	$(LZ4) -BX tmp-tlb-hw -c -q | $(LZ4) -tv  # test block checksum
+	# ./datagen -g20KB generates the same file every single time
+	# cannot save output of ./datagen -g20KB as input file to lz4 because the following shell commands are run before ./datagen -g20KB
+	test "$(shell ./datagen -g20KB | $(LZ4) -c --fast | wc -c)" -lt "$(shell ./datagen -g20KB | $(LZ4) -c --fast=9 | wc -c)" # -1 vs -9
+	test "$(shell ./datagen -g20KB | $(LZ4) -c -1 | wc -c)" -lt "$(shell ./datagen -g20KB| $(LZ4) -c --fast=1 | wc -c)" # 1 vs -1
+	test "$(shell ./datagen -g20KB | $(LZ4) -c --fast=1 | wc -c)" -eq "$(shell ./datagen -g20KB| $(LZ4) -c --fast| wc -c)" # checks default fast compression is -1
+	! $(LZ4) -c --fast=0 tmp-tlb-dg20K # lz4 should fail when fast=0
+	! $(LZ4) -c --fast=-1 tmp-tlb-dg20K # lz4 should fail when fast=-1
 	@$(RM) tmp-tlb*
 
+
+
 test-lz4-dict: lz4 datagen
 	@echo "\n ---- test lz4 compression/decompression with dictionary ----"
 	./datagen -g16KB > tmp-dict
diff --git a/tests/fullbench.c b/tests/fullbench.c
index c06e2301d..fd1202df1 100644
--- a/tests/fullbench.c
+++ b/tests/fullbench.c
@@ -220,8 +220,16 @@ static int local_LZ4_compress_fast_continue0(const char* in, char* out, int inSi
 }
 
 #ifndef LZ4_DLL_IMPORT
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
 /* declare hidden function */
-int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize);
+extern int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize);
+
+#if defined (__cplusplus)
+}
+#endif
 
 static int local_LZ4_compress_forceDict(const char* in, char* out, int inSize)
 {
@@ -289,8 +297,16 @@ static int local_LZ4_decompress_safe_usingDict(const char* in, char* out, int in
 }
 
 #ifndef LZ4_DLL_IMPORT
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
 extern int LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize, const void* dict, size_t dictSize);
 
+#if defined (__cplusplus)
+}
+#endif
+
 static int local_LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize)
 {
     (void)inSize;
@@ -301,7 +317,9 @@ static int local_LZ4_decompress_safe_forceExtDict(const char* in, char* out, int
 
 static int local_LZ4_decompress_safe_partial(const char* in, char* out, int inSize, int outSize)
 {
-    return LZ4_decompress_safe_partial(in, out, inSize, outSize - 5, outSize);
+    int result = LZ4_decompress_safe_partial(in, out, inSize, outSize - 5, outSize);
+    if (result < 0) return result;
+    return outSize;
 }
 
 
@@ -446,9 +464,9 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles)
             case 12: compressionFunction = local_LZ4_compress_HC_extStateHC; compressorName = "LZ4_compress_HC_extStateHC"; break;
             case 14: compressionFunction = local_LZ4_compress_HC_continue; initFunction = local_LZ4_resetStreamHC; compressorName = "LZ4_compress_HC_continue"; break;
 #ifndef LZ4_DLL_IMPORT
-			case 20: compressionFunction = local_LZ4_compress_forceDict; initFunction = local_LZ4_resetDictT; compressorName = "LZ4_compress_forceDict"; break;
+            case 20: compressionFunction = local_LZ4_compress_forceDict; initFunction = local_LZ4_resetDictT; compressorName = "LZ4_compress_forceDict"; break;
 #endif
-			case 30: compressionFunction = local_LZ4F_compressFrame; compressorName = "LZ4F_compressFrame";
+            case 30: compressionFunction = local_LZ4F_compressFrame; compressorName = "LZ4F_compressFrame";
                         chunkP[0].origSize = (int)benchedSize; nbChunks=1;
                         break;
             case 40: compressionFunction = local_LZ4_saveDict; compressorName = "LZ4_saveDict";
@@ -526,6 +544,7 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles)
             const char* dName;
             int (*decompressionFunction)(const char*, char*, int, int);
             double bestTime = 100000000.;
+            int checkResult = 1;
 
             if ((g_decompressionAlgo != ALL_DECOMPRESSORS) && (g_decompressionAlgo != dAlgNb)) continue;
 
@@ -537,11 +556,11 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles)
             case 3: decompressionFunction = local_LZ4_decompress_fast_usingExtDict; dName = "LZ4_decompress_fast_using(Ext)Dict"; break;
             case 4: decompressionFunction = LZ4_decompress_safe; dName = "LZ4_decompress_safe"; break;
             case 6: decompressionFunction = local_LZ4_decompress_safe_usingDict; dName = "LZ4_decompress_safe_usingDict"; break;
-            case 7: decompressionFunction = local_LZ4_decompress_safe_partial; dName = "LZ4_decompress_safe_partial"; break;
+            case 7: decompressionFunction = local_LZ4_decompress_safe_partial; dName = "LZ4_decompress_safe_partial"; checkResult = 0; break;
 #ifndef LZ4_DLL_IMPORT
-			case 8: decompressionFunction = local_LZ4_decompress_safe_forceExtDict; dName = "LZ4_decompress_safe_forceExtDict"; break;
+            case 8: decompressionFunction = local_LZ4_decompress_safe_forceExtDict; dName = "LZ4_decompress_safe_forceExtDict"; break;
 #endif
-			case 9: decompressionFunction = local_LZ4F_decompress; dName = "LZ4F_decompress";
+            case 9: decompressionFunction = local_LZ4F_decompress; dName = "LZ4F_decompress";
                     errorCode = LZ4F_compressFrame(compressed_buff, compressedBuffSize, orig_buff, benchedSize, NULL);
                     if (LZ4F_isError(errorCode)) {
                         DISPLAY("Error while preparing compressed frame\n");
@@ -573,9 +592,13 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles)
                 clockTime = clock();
                 while(BMK_GetClockSpan(clockTime) < TIMELOOP) {
                     for (chunkNb=0; chunkNb<nbChunks; chunkNb++) {
-                        int decodedSize = decompressionFunction(chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origBuffer, chunkP[chunkNb].compressedSize, chunkP[chunkNb].origSize);
-                        if (chunkP[chunkNb].origSize != decodedSize) DISPLAY("ERROR ! %s() == %i != %i !! \n", dName, decodedSize, chunkP[chunkNb].origSize), exit(1);
-                    }
+                        int const decodedSize = decompressionFunction(chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origBuffer,
+                                                                      chunkP[chunkNb].compressedSize, chunkP[chunkNb].origSize);
+                        if (chunkP[chunkNb].origSize != decodedSize) {
+                            DISPLAY("ERROR ! %s() == %i != %i !! \n",
+                                    dName, decodedSize, chunkP[chunkNb].origSize);
+                            exit(1);
+                    }   }
                     nb_loops++;
                 }
                 clockTime = BMK_GetClockSpan(clockTime);
@@ -588,8 +611,11 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles)
 
                 /* CRC Checking */
                 crcDecoded = XXH32(orig_buff, (int)benchedSize, 0);
-                if (crcOriginal!=crcDecoded) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", inFileName, (unsigned)crcOriginal, (unsigned)crcDecoded); exit(1); }
-            }
+                if (checkResult && (crcOriginal!=crcDecoded)) {
+                    DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n",
+                            inFileName, (unsigned)crcOriginal, (unsigned)crcDecoded);
+                    exit(1);
+            }   }
 
             DISPLAY("%2i-%-34.34s :%10i -> %7.1f MB/s\n", dAlgNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000);
         }
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index 5dd75b389..b29e82e4e 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -38,7 +38,7 @@
 /*-************************************
 *  Dependencies
 **************************************/
-#ifdef __unix__   /* must be included before platform.h for MAP_ANONYMOUS */
+#if defined(__unix__) && !defined(_AIX)   /* must be included before platform.h for MAP_ANONYMOUS */
 #  include <sys/mman.h>   /* mmap */
 #endif
 #include "platform.h"   /* _CRT_SECURE_NO_WARNINGS */
@@ -48,6 +48,10 @@
 #include <string.h>     /* strcmp */
 #include <time.h>       /* clock_t, clock, CLOCKS_PER_SEC */
 #include <assert.h>
+#if defined(__unix__) && defined(_AIX)
+#  include <sys/mman.h>   /* mmap */
+#endif
+
 #define LZ4_STATIC_LINKING_ONLY
 #define LZ4_HC_STATIC_LINKING_ONLY
 #include "lz4hc.h"
@@ -319,12 +323,17 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
     int result = 0;
     unsigned cycleNb;
 
-#   define FUZ_CHECKTEST(cond, ...) if (cond) { printf("Test %u : ", testNb); printf(__VA_ARGS__); \
-                                                printf(" (seed %u, cycle %u) \n", seed, cycleNb); goto _output_error; }
+#   define FUZ_CHECKTEST(cond, ...)                            \
+        if (cond) {                                            \
+            printf("Test %u : ", testNb); printf(__VA_ARGS__); \
+            printf(" (seed %u, cycle %u) \n", seed, cycleNb);  \
+            goto _output_error;                                \
+        }
+
 #   define FUZ_DISPLAYTEST(...) {                 \
                 testNb++;                         \
                 if (g_displayLevel>=4) {          \
-                    printf("\r%4u - %2u ", cycleNb, testNb);  \
+                    printf("\r%4u - %2u :", cycleNb, testNb);  \
                     printf(" " __VA_ARGS__);      \
                     printf("   ");                \
                     fflush(stdout);               \
@@ -495,7 +504,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
 
         /* Test decoding with empty input */
         FUZ_DISPLAYTEST("LZ4_decompress_safe() with empty input");
-        LZ4_decompress_safe(NULL, decodedBuffer, 0, blockSize);
+        LZ4_decompress_safe(compressedBuffer, decodedBuffer, 0, blockSize);
 
         /* Test decoding with a one byte input */
         FUZ_DISPLAYTEST("LZ4_decompress_safe() with one byte input");
@@ -536,7 +545,6 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
         ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, blockSize+1);
         FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe failed despite amply sufficient space");
         FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe did not regenerate original data");
-        //FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe wrote more than (unknown) target size");   // well, is that an issue ?
         FUZ_CHECKTEST(decodedBuffer[blockSize+1], "LZ4_decompress_safe overrun specified output buffer size");
         {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
             FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe corrupted decoded data");
@@ -570,15 +578,16 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
         FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to input size being too large");
         FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe overrun specified output buffer size");
 
-        // Test partial decoding with target output size being max/2 => must work
-        FUZ_DISPLAYTEST();
-        ret = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, compressedSize, blockSize/2, blockSize);
-        FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe_partial failed despite sufficient space");
-
-        // Test partial decoding with target output size being just below max => must work
-        FUZ_DISPLAYTEST();
-        ret = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, compressedSize, blockSize-3, blockSize);
-        FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe_partial failed despite sufficient space");
+        /* Test partial decoding => must work */
+        FUZ_DISPLAYTEST("test LZ4_decompress_safe_partial");
+        {   size_t const missingBytes = FUZ_rand(&randState) % blockSize;
+            int const targetSize = (int)(blockSize - missingBytes);
+            char const sentinel = decodedBuffer[targetSize] = block[targetSize] ^ 0x5A;
+            int const decResult = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, compressedSize, targetSize, blockSize);
+            FUZ_CHECKTEST(decResult<0, "LZ4_decompress_safe_partial failed despite valid input data (error:%i)", decResult);
+            FUZ_CHECKTEST(decResult != targetSize, "LZ4_decompress_safe_partial did not regenerated required amount of data (%i < %i <= %i)", decResult, targetSize, blockSize);
+            FUZ_CHECKTEST(decodedBuffer[targetSize] != sentinel, "LZ4_decompress_safe_partial overwrite beyond requested size (though %i <= %i <= %i)", decResult, targetSize, blockSize);
+        }
 
         /* Test Compression with limited output size */
 
@@ -801,7 +810,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
         FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe_usingDict should have failed : not enough output size (-1 byte)");
         FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_safe_usingDict overrun specified output buffer size");
 
-        FUZ_DISPLAYTEST();
+        FUZ_DISPLAYTEST("LZ4_decompress_safe_usingDict with a too small output buffer");
         {   U32 const missingBytes = (FUZ_rand(&randState) & 0xF) + 2;
             if ((U32)blockSize > missingBytes) {
                 decodedBuffer[blockSize-missingBytes] = 0;
@@ -811,7 +820,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
         }   }
 
         /* Compress HC using External dictionary */
-        FUZ_DISPLAYTEST();
+        FUZ_DISPLAYTEST("LZ4_compress_HC_continue with an external dictionary");
         dict -= (FUZ_rand(&randState) & 7);    /* even bigger separation */
         if (dict < (char*)CNBuffer) dict = (char*)CNBuffer;
         LZ4_resetStreamHC (&LZ4dictHC, compressionLevel);
diff --git a/tests/roundTripTest.c b/tests/roundTripTest.c
new file mode 100644
index 000000000..2d344518e
--- /dev/null
+++ b/tests/roundTripTest.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/*
+ * This program takes a file in input,
+ * performs an LZ4 round-trip test (compress + decompress)
+ * compares the result with original
+ * and generates an abort() on corruption detection,
+ * in order for afl to register the event as a crash.
+*/
+
+
+/*===========================================
+*   Tuning Constant
+*==========================================*/
+#ifndef MIN_CLEVEL
+#  define MIN_CLEVEL (int)(-5)
+#endif
+
+
+
+/*===========================================
+*   Dependencies
+*==========================================*/
+#include <stddef.h>     /* size_t */
+#include <stdlib.h>     /* malloc, free, exit */
+#include <stdio.h>      /* fprintf */
+#include <string.h>     /* strcmp */
+#include <assert.h>
+#include <sys/types.h>  /* stat */
+#include <sys/stat.h>   /* stat */
+#include "xxhash.h"
+
+#include "lz4.h"
+#include "lz4hc.h"
+
+
+/*===========================================
+*   Macros
+*==========================================*/
+#define MIN(a,b)  ( (a) < (b) ? (a) : (b) )
+
+#define MSG(...)    fprintf(stderr, __VA_ARGS__)
+
+#define CONTROL_MSG(c, ...) {   \
+    if ((c)) {                  \
+        MSG(__VA_ARGS__);       \
+        MSG(" \n");             \
+        abort();                \
+    }                           \
+}
+
+
+static size_t checkBuffers(const void* buff1, const void* buff2, size_t buffSize)
+{
+    const char* const ip1 = (const char*)buff1;
+    const char* const ip2 = (const char*)buff2;
+    size_t pos;
+
+    for (pos=0; pos<buffSize; pos++)
+        if (ip1[pos]!=ip2[pos])
+            break;
+
+    return pos;
+}
+
+
+/* select a compression level
+ * based on first bytes present in a reference buffer */
+static int select_clevel(const void* refBuff, size_t refBuffSize)
+{
+    const int minCLevel = MIN_CLEVEL;
+    const int maxClevel = LZ4HC_CLEVEL_MAX;
+    const int cLevelSpan = maxClevel - minCLevel;
+    size_t const hashLength = MIN(16, refBuffSize);
+    unsigned const h32 = XXH32(refBuff, hashLength, 0);
+    int const randL = h32 % (cLevelSpan+1);
+
+    return minCLevel + randL;
+}
+
+
+typedef int (*compressFn)(const char* src, char* dst, int srcSize, int dstSize, int cLevel);
+
+
+/** roundTripTest() :
+ *  Compresses `srcBuff` into `compressedBuff`,
+ *  then decompresses `compressedBuff` into `resultBuff`.
+ *  If clevel==0, compression level is derived from srcBuff's content head bytes.
+ *  This function abort() if it detects any round-trip error.
+ *  Therefore, if it returns, round trip is considered successfully validated.
+ *  Note : `compressedBuffCapacity` should be `>= LZ4_compressBound(srcSize)`
+ *         for compression to be guaranteed to work */
+static void roundTripTest(void* resultBuff, size_t resultBuffCapacity,
+                          void* compressedBuff, size_t compressedBuffCapacity,
+                    const void* srcBuff, size_t srcSize,
+                          int clevel)
+{
+    int const proposed_clevel = clevel ? clevel : select_clevel(srcBuff, srcSize);
+    int const selected_clevel = proposed_clevel < 0 ? -proposed_clevel : proposed_clevel;   /* if level < 0, it becomes an accelearion value */
+    compressFn compress = selected_clevel >= LZ4HC_CLEVEL_MIN ? LZ4_compress_HC : LZ4_compress_fast;
+    int const cSize = compress((const char*)srcBuff, (char*)compressedBuff, (int)srcSize, (int)compressedBuffCapacity, selected_clevel);
+    CONTROL_MSG(cSize == 0, "Compression error !");
+
+    {   int const dSize = LZ4_decompress_safe((const char*)compressedBuff, (char*)resultBuff, cSize, (int)resultBuffCapacity);
+        CONTROL_MSG(dSize < 0, "Decompression detected an error !");
+        CONTROL_MSG(dSize != (int)srcSize, "Decompression corruption error : wrong decompressed size !");
+    }
+
+    /* check potential content corruption error */
+    assert(resultBuffCapacity >= srcSize);
+    {   size_t const errorPos = checkBuffers(srcBuff, resultBuff, srcSize);
+        CONTROL_MSG(errorPos != srcSize,
+                    "Silent decoding corruption, at pos %u !!!",
+                    (unsigned)errorPos);
+    }
+
+}
+
+static void roundTripCheck(const void* srcBuff, size_t srcSize, int clevel)
+{
+    size_t const cBuffSize = LZ4_compressBound((int)srcSize);
+    void* const cBuff = malloc(cBuffSize);
+    void* const rBuff = malloc(cBuffSize);
+
+    if (!cBuff || !rBuff) {
+        fprintf(stderr, "not enough memory ! \n");
+        exit(1);
+    }
+
+    roundTripTest(rBuff, cBuffSize,
+                  cBuff, cBuffSize,
+                  srcBuff, srcSize,
+                  clevel);
+
+    free(rBuff);
+    free(cBuff);
+}
+
+
+static size_t getFileSize(const char* infilename)
+{
+    int r;
+#if defined(_MSC_VER)
+    struct _stat64 statbuf;
+    r = _stat64(infilename, &statbuf);
+    if (r || !(statbuf.st_mode & S_IFREG)) return 0;   /* No good... */
+#else
+    struct stat statbuf;
+    r = stat(infilename, &statbuf);
+    if (r || !S_ISREG(statbuf.st_mode)) return 0;   /* No good... */
+#endif
+    return (size_t)statbuf.st_size;
+}
+
+
+static int isDirectory(const char* infilename)
+{
+    int r;
+#if defined(_MSC_VER)
+    struct _stat64 statbuf;
+    r = _stat64(infilename, &statbuf);
+    if (!r && (statbuf.st_mode & _S_IFDIR)) return 1;
+#else
+    struct stat statbuf;
+    r = stat(infilename, &statbuf);
+    if (!r && S_ISDIR(statbuf.st_mode)) return 1;
+#endif
+    return 0;
+}
+
+
+/** loadFile() :
+ *  requirement : `buffer` size >= `fileSize` */
+static void loadFile(void* buffer, const char* fileName, size_t fileSize)
+{
+    FILE* const f = fopen(fileName, "rb");
+    if (isDirectory(fileName)) {
+        MSG("Ignoring %s directory \n", fileName);
+        exit(2);
+    }
+    if (f==NULL) {
+        MSG("Impossible to open %s \n", fileName);
+        exit(3);
+    }
+    {   size_t const readSize = fread(buffer, 1, fileSize, f);
+        if (readSize != fileSize) {
+            MSG("Error reading %s \n", fileName);
+            exit(5);
+    }   }
+    fclose(f);
+}
+
+
+static void fileCheck(const char* fileName, int clevel)
+{
+    size_t const fileSize = getFileSize(fileName);
+    void* const buffer = malloc(fileSize + !fileSize /* avoid 0 */);
+    if (!buffer) {
+        MSG("not enough memory \n");
+        exit(4);
+    }
+    loadFile(buffer, fileName, fileSize);
+    roundTripCheck(buffer, fileSize, clevel);
+    free (buffer);
+}
+
+
+int bad_usage(const char* exeName)
+{
+    MSG(" \n");
+    MSG("bad usage: \n");
+    MSG(" \n");
+    MSG("%s [Options] fileName \n", exeName);
+    MSG(" \n");
+    MSG("Options: \n");
+    MSG("-#     : use #=[0-9] compression level (default:0 == random) \n");
+    return 1;
+}
+
+
+int main(int argCount, const char** argv)
+{
+    const char* const exeName = argv[0];
+    int argNb = 1;
+    int clevel = 0;
+
+    assert(argCount >= 1);
+    if (argCount < 2) return bad_usage(exeName);
+
+    if (argv[1][0] == '-') {
+        clevel = argv[1][1] - '0';
+        argNb = 2;
+    }
+
+    if (argNb >= argCount) return bad_usage(exeName);
+
+    fileCheck(argv[argNb], clevel);
+    MSG("no pb detected \n");
+    return 0;
+}