From 34efbb2472543f187cc4a59a5fffa742f5b9a254 Mon Sep 17 00:00:00 2001 From: gonzo Date: Fri, 7 Mar 2025 21:26:59 +0100 Subject: [PATCH 01/18] Support for lz4 compression #163 --- pyproject.toml | 3 ++- src/xopen/__init__.py | 59 ++++++++++++++++++++++++++++++++++++++---- tests/file.txt.lz4 | Bin 0 -> 56 bytes tests/test_piped.py | 10 ++++++- tests/test_xopen.py | 6 +++++ 5 files changed, 71 insertions(+), 7 deletions(-) create mode 100644 tests/file.txt.lz4 diff --git a/pyproject.toml b/pyproject.toml index 5522cd7..189ff63 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,8 +27,9 @@ dependencies = [ homepage = "https://github.com/pycompression/xopen/" [project.optional-dependencies] -dev = ["pytest"] +dev = ["pytest", "pytest-timeout"] zstd = ["zstandard<1"] +lz4 = ["lz4>=4.3.2"] [tool.setuptools_scm] write_to = "src/xopen/_version.py" diff --git a/src/xopen/__init__.py b/src/xopen/__init__.py index 89f5137..8ac8a7f 100644 --- a/src/xopen/__init__.py +++ b/src/xopen/__init__.py @@ -43,6 +43,7 @@ XOPEN_DEFAULT_BZ2_COMPRESSION = 9 XOPEN_DEFAULT_XZ_COMPRESSION = 6 XOPEN_DEFAULT_ZST_COMPRESSION = 3 +XOPEN_DEFAULT_LZ4_COMPRESSION = 1 igzip: Optional[ModuleType] isal_zlib: Optional[ModuleType] @@ -70,6 +71,11 @@ except ImportError: zstandard = None # type: ignore +try: + import lz4.frame # type: ignore +except ImportError: + lz4 = None + try: import fcntl @@ -120,6 +126,7 @@ class _ProgramSettings: "zstd": _ProgramSettings(("zstd",), tuple(range(1, 20)), "-T"), "pigz": _ProgramSettings(("pigz", "--no-name"), tuple(range(0, 10)) + (11,), "-p"), "gzip": _ProgramSettings(("gzip", "--no-name"), tuple(range(1, 10))), + "lz4": _ProgramSettings(("lz4",), tuple(range(1, 13)), "-T"), } @@ -551,6 +558,41 @@ def _open_zst( return io.BufferedWriter(f) # mode "ab" and "wb" +def _open_lz4( + filename: FileOrPath, + mode: str, + compresslevel: Optional[int], + threads: Optional[int], +): + assert mode in ("rb", "ab", "wb") + if compresslevel is None: + compresslevel = XOPEN_DEFAULT_LZ4_COMPRESSION + + if threads != 0: + try: + # zstd can compress using multiple cores + program_args: Tuple[str, ...] = ("lz4",) + return _PipedCompressionProgram( + filename, + mode, + compresslevel, + threads, + program_settings=_PROGRAM_SETTINGS["lz4"], + ) + except OSError: + if lz4 is None: + # No fallback available + raise + + if lz4 is None: + raise ImportError("lz4 module not available") + f = lz4.frame.LZ4FrameFile(filename, mode, compression_level=compresslevel) + if "r" in mode: + return f + # Buffer writes on lz4.open to mitigate overhead of small writes + return io.BufferedWriter(f) + + def _open_gz( filename: FileOrPath, mode: str, @@ -683,6 +725,10 @@ def _detect_format_from_content(filename: FileOrPath) -> Optional[str]: elif bs[:4] == b"\x28\xb5\x2f\xfd": # https://datatracker.ietf.org/doc/html/rfc8478#section-3.1.1 return "zst" + elif bs[:4] == b"\x04\x22\x4d\x18": + # https://en.wikipedia.org/wiki/LZ4_(compression_algorithm) + return "lz4" + return None finally: if closefd: @@ -694,7 +740,7 @@ def _detect_format_from_extension(filename: Union[str, bytes]) -> Optional[str]: Attempt to detect file format from the filename extension. Return None if no format could be detected. """ - for ext in ("bz2", "xz", "gz", "zst"): + for ext in ("bz2", "xz", "gz", "zst", "lz4"): if isinstance(filename, bytes): if filename.endswith(b"." + ext.encode()): return ext @@ -797,6 +843,7 @@ def xopen( # noqa: C901 - .bz2 uses bzip2 compression - .xz uses xz/lzma compression - .zst uses zstandard compression + - .lz4 uses lz4 compression - otherwise, no compression is used When reading, if a file name extension is available, the format is detected @@ -808,7 +855,7 @@ def xopen( # noqa: C901 compresslevel is the compression level for writing to gzip, xz and zst files. This parameter is ignored for the other compression formats. If set to None, a default depending on the format is used: - gzip: 6, xz: 6, zstd: 3. + gzip: 6, xz: 6, zstd: 3, lz4: 1. When threads is None (the default), compressed file formats are read or written using a pipe to a subprocess running an external tool such as, @@ -828,7 +875,7 @@ def xopen( # noqa: C901 format overrides the autodetection of input and output formats. This can be useful when compressed output needs to be written to a file without an - extension. Possible values are "gz", "xz", "bz2", "zst". + extension. Possible values are "gz", "xz", "bz2", "zst", "lz4". """ if mode in ("r", "w", "a"): mode += "t" # type: ignore @@ -844,10 +891,10 @@ def xopen( # noqa: C901 elif _file_is_a_socket_or_pipe(filename): filename = open(filename, binary_mode) # type: ignore - if format not in (None, "gz", "xz", "bz2", "zst"): + if format not in (None, "gz", "xz", "bz2", "zst", "lz4"): raise ValueError( f"Format not supported: {format}. " - f"Choose one of: 'gz', 'xz', 'bz2', 'zst'" + f"Choose one of: 'gz', 'xz', 'bz2', 'zst', 'lz4'." ) detected_format = format or _detect_format_from_extension(filepath) if detected_format is None and "r" in mode: @@ -861,6 +908,8 @@ def xopen( # noqa: C901 opened_file = _open_bz2(filename, binary_mode, compresslevel, threads) elif detected_format == "zst": opened_file = _open_zst(filename, binary_mode, compresslevel, threads) + elif detected_format == "lz4": + opened_file = _open_lz4(filename, binary_mode, compresslevel, threads) else: opened_file, _ = _file_or_path_to_binary_stream(filename, binary_mode) diff --git a/tests/file.txt.lz4 b/tests/file.txt.lz4 new file mode 100644 index 0000000000000000000000000000000000000000..5b2ed807723ae198c54923600209e2e9694c2692 GIT binary patch literal 56 zcmZQk@|8$&Sgy*zz_2JJwYVfRFI`8Wgp=U|yMmsc9#=?4szPyUa(-TlLQZC0svZ|m K9mD56#~J{$aS);a literal 0 HcmV?d00001 diff --git a/tests/test_piped.py b/tests/test_piped.py index 9f8afbe..eba903f 100644 --- a/tests/test_piped.py +++ b/tests/test_piped.py @@ -18,7 +18,7 @@ _ProgramSettings, ) -extensions = ["", ".gz", ".bz2", ".xz", ".zst"] +extensions = ["", ".gz", ".bz2", ".xz", ".zst", ".lz4"] try: import fcntl @@ -57,16 +57,24 @@ def available_zstd_programs(): return [] +def available_lz4_programs(): + if shutil.which("lz4"): + return [_PROGRAM_SETTINGS["lz4"]] + return [] + + PIPED_GZIP_PROGRAMS = available_gzip_programs() PIPED_BZIP2_PROGRAMS = available_bzip2_programs() PIPED_XZ_PROGRAMS = available_xz_programs() PIPED_ZST_PROGRAMS = available_zstd_programs() +PIPED_LZ4_PROGRAMS = available_lz4_programs() ALL_PROGRAMS_WITH_EXTENSION = ( list(zip(PIPED_GZIP_PROGRAMS, cycle([".gz"]))) + list(zip(PIPED_BZIP2_PROGRAMS, cycle([".bz2"]))) + list(zip(PIPED_XZ_PROGRAMS, cycle([".xz"]))) + list(zip(PIPED_ZST_PROGRAMS, cycle([".zst"]))) + + list(zip(PIPED_LZ4_PROGRAMS, cycle([".lz4"]))) ) diff --git a/tests/test_xopen.py b/tests/test_xopen.py index 86234bc..ec337ef 100644 --- a/tests/test_xopen.py +++ b/tests/test_xopen.py @@ -23,6 +23,10 @@ except ImportError: zstandard = None +try: + import lz4.frame +except ImportError: + lz4 = None # TODO this is duplicated in test_piped.py TEST_DIR = Path(__file__).parent @@ -31,6 +35,8 @@ extensions = ["", ".gz", ".bz2", ".xz"] if shutil.which("zstd") or zstandard: extensions += [".zst"] +if shutil.which("lz4") or lz4: + extensions += [".lz4"] base = os.path.join(os.path.dirname(__file__), "file.txt") files = [base + ext for ext in extensions] From 00de271e03ac4f38c3b9eaf02d66befcc0b9ce77 Mon Sep 17 00:00:00 2001 From: gonzo Date: Sat, 8 Mar 2025 11:39:43 +0100 Subject: [PATCH 02/18] updates: - tox.ini to include lz4 tests - pyproject.toml to include dev dependencies - .github/workflows/ci.yml to include lz4 tests - tests to include lz4 tests --- .github/workflows/ci.yml | 8 ++++++-- .gitignore | 5 +++++ pyproject.toml | 21 +++++++++++++++------ src/xopen/__init__.py | 2 -- tests/test_xopen.py | 32 ++++++++++++++++++++++++++++++++ tox.ini | 5 +++++ 6 files changed, 63 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index be3a90c..ee42ad4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -54,15 +54,16 @@ jobs: optional-deps: false with-libs: false with-zstandard: true + with-lz4: true - os: windows-latest python-version: "3.10" steps: - name: Install optional tools macOS if: runner.os == 'macOS' && matrix.optional-deps - run: brew install pigz pbzip2 isa-l zstd + run: brew install pigz pbzip2 isa-l zstd lz4 - name: Install optional tools Linux if: runner.os == 'Linux' && matrix.optional-deps - run: sudo apt-get install pigz pbzip2 isal zstd + run: sudo apt-get install pigz pbzip2 isal zstd lz4 - name: Remove xz if: runner.os == 'Linux' && !matrix.optional-deps run: while which xz; do sudo rm $(which xz); done @@ -84,6 +85,9 @@ jobs: - name: Test with zstandard if: matrix.with-zstandard run: tox -e zstd + - name: Test with lz4 + if: matrix.with-lz4 + run: tox -e lz4 - name: Upload coverage report uses: codecov/codecov-action@v3 diff --git a/.gitignore b/.gitignore index 5c08169..47475b8 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,8 @@ __pycache__/ .tox venv/ src/xopen/_version.py +settings.json +.coverage +coverage.xml +.vscode/* +.DS_Store diff --git a/pyproject.toml b/pyproject.toml index 189ff63..0d6e1f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,29 +5,38 @@ build-backend = "setuptools.build_meta" [project] name = "xopen" authors = [ - {name = "Marcel Martin", email = "marcel.martin@scilifelab.se"}, - {name = "Ruben Vorderman", email = "r.h.p.vorderman@lumc.nl"} + { name = "Marcel Martin", email = "marcel.martin@scilifelab.se" }, + { name = "Ruben Vorderman", email = "r.h.p.vorderman@lumc.nl" }, ] description = "Open compressed files transparently" readme = "README.rst" -license = {text = "MIT"} +license = { text = "MIT" } classifiers = [ "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3" + "Programming Language :: Python :: 3", ] requires-python = ">=3.8" dynamic = ["version"] dependencies = [ 'isal>=1.6.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"', - 'zlib-ng>=0.4.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"' + 'zlib-ng>=0.4.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"', ] [project.urls] homepage = "https://github.com/pycompression/xopen/" [project.optional-dependencies] -dev = ["pytest", "pytest-timeout"] +dev = [ + "pytest", + "pytest-timeout", + "tox", + "black", + "flake8", + "mypy", + "twine", + "setuptools_scm[toml]", +] zstd = ["zstandard<1"] lz4 = ["lz4>=4.3.2"] diff --git a/src/xopen/__init__.py b/src/xopen/__init__.py index 8ac8a7f..7744ae3 100644 --- a/src/xopen/__init__.py +++ b/src/xopen/__init__.py @@ -570,8 +570,6 @@ def _open_lz4( if threads != 0: try: - # zstd can compress using multiple cores - program_args: Tuple[str, ...] = ("lz4",) return _PipedCompressionProgram( filename, mode, diff --git a/tests/test_xopen.py b/tests/test_xopen.py index ec337ef..9fe5736 100644 --- a/tests/test_xopen.py +++ b/tests/test_xopen.py @@ -115,6 +115,8 @@ def test_binary(fname): def test_roundtrip(ext, tmp_path, threads, mode): if ext == ".zst" and threads == 0 and zstandard is None: return + if ext == ".lz4" and threads == 0 and lz4 is None: + pytest.skip("lz4 not installed") path = tmp_path / f"file{ext}" data = b"Hello" if mode == "b" else "Hello" with xopen(path, "w" + mode, threads=threads) as f: @@ -126,6 +128,8 @@ def test_roundtrip(ext, tmp_path, threads, mode): def test_binary_no_isal_no_threads(fname, xopen_without_igzip): if fname.endswith(".zst") and zstandard is None: return + if fname.endswith(".lz4") and lz4 is None: + pytest.skip("lz4 not installed") with xopen_without_igzip(fname, "rb", threads=0) as f: lines = list(f) assert len(lines) == 2 @@ -276,6 +280,8 @@ def test_invalid_compression_level(tmp_path): def test_append(ext, threads, tmp_path): if ext == ".zst" and zstandard is None and threads == 0: pytest.skip("No zstandard installed") + if ext == ".lz4" and lz4 is None and threads == 0: + pytest.skip("No lz4 installed") text = b"AB" reference = text + text path = tmp_path / f"the-file{ext}" @@ -361,6 +367,10 @@ def test_read_no_threads(ext): } if ext == ".zst" and zstandard is None: return + if ext == ".lz4" and lz4 is None: + pytest.skip("lz4 not installed") + elif ext == ".lz4" and lz4: + klasses[".lz4"] = lz4.frame.LZ4FrameFile klass = klasses[ext] with xopen(TEST_DIR / f"file.txt{ext}", "rb", threads=0) as f: assert isinstance(f, klass), f @@ -393,6 +403,12 @@ def test_write_no_threads(tmp_path, ext): # Skip zst because if python-zstandard is not installed, # we fall back to an external process even when threads=0 return + if ext == ".lz4" and lz4 is None: + # if lz4 is not installed, we skip this test + pytest.skip("lz4 not installed") + elif ext == ".lz4" and lz4: + # test if lz4 is installed + klasses[".lz4"] = lz4.frame.LZ4FrameFile klass = klasses[ext] with xopen(tmp_path / f"out{ext}", "wb", threads=0) as f: if isinstance(f, io.BufferedWriter): @@ -534,6 +550,8 @@ def test_override_output_format_wrong_format(tmp_path): def test_text_encoding_newline_passthrough(opener, extension, tmp_path): if extension == ".zst" and zstandard is None: return + if extension == ".lz4" and lz4 is None: + pytest.skip("lz4 not installed") # "Eén ree\nTwee reeën\n" latin-1 encoded with \r for as line separator. encoded_text = b"E\xe9n ree\rTwee ree\xebn\r" path = tmp_path / f"test.txt{extension}" @@ -549,6 +567,8 @@ def test_text_encoding_newline_passthrough(opener, extension, tmp_path): def test_text_encoding_errors(opener, extension, tmp_path): if extension == ".zst" and zstandard is None: return + if extension == ".lz4" and lz4 is None: + pytest.skip("lz4 not installed") # "Eén ree\nTwee reeën\n" latin-1 encoded. This is not valid ascii. encoded_text = b"E\xe9n ree\nTwee ree\xebn\n" path = tmp_path / f"test.txt{extension}" @@ -605,6 +625,8 @@ def test_xopen_zst_long_window_size(threads): def test_pass_file_object_for_reading(ext, threads): if ext == ".zst" and zstandard is None: return + if ext == ".lz4" and lz4 is None: + pytest.skip("lz4 not installed") with open(TEST_DIR / f"file.txt{ext}", "rb") as fh: with xopen(fh, mode="rb", threads=threads) as f: @@ -633,6 +655,8 @@ def test_pass_bytesio_for_reading_and_writing(ext, threads): format = None if ext == ".zst" and zstandard is None: return + if ext == ".lz4" and lz4 is None: + pytest.skip("lz4 not installed") first_line = CONTENT_LINES[0].encode("utf-8") writer = xopen(filelike, "wb", format=format, threads=threads) writer.write(first_line) @@ -648,6 +672,8 @@ def test_pass_bytesio_for_reading_and_writing(ext, threads): def test_xopen_stdin(monkeypatch, ext, threads): if ext == ".zst" and zstandard is None: return + if ext == ".lz4" and lz4 is None: + pytest.skip("lz4 not installed") # Add encoding to suppress encoding warnings with open(TEST_DIR / f"file.txt{ext}", "rt", encoding="latin-1") as in_file: monkeypatch.setattr("sys.stdin", in_file) @@ -671,6 +697,8 @@ def test_xopen_stdout(monkeypatch): def test_xopen_read_from_pipe(ext, threads): if ext == ".zst" and zstandard is None: return + if ext == ".lz4" and lz4 is None: + pytest.skip("lz4 not installed") in_file = TEST_DIR / f"file.txt{ext}" process = subprocess.Popen(("cat", str(in_file)), stdout=subprocess.PIPE) with xopen(process.stdout, "rt", threads=threads) as f: @@ -684,6 +712,8 @@ def test_xopen_read_from_pipe(ext, threads): def test_xopen_write_to_pipe(threads, ext): if ext == ".zst" and zstandard is None: return + if ext == ".lz4" and lz4 is None: + pytest.skip("lz4 not installed") format = ext.lstrip(".") if format == "": format = None @@ -705,6 +735,8 @@ def test_xopen_write_to_pipe(threads, ext): def test_xopen_dev_stdin_read(threads, ext): if ext == ".zst" and zstandard is None: return + if ext == ".lz4" and lz4 is None: + pytest.skip("lz4 not installed") file = str(Path(__file__).parent / f"file.txt{ext}") result = subprocess.run( f"cat {file} | python -c 'import xopen; " diff --git a/tox.ini b/tox.ini index 1e063fb..b385ea8 100644 --- a/tox.ini +++ b/tox.ini @@ -21,6 +21,11 @@ deps = {[testenv]deps} zstandard +[testenv:lz4] +deps = + {[testenv]deps} + lz4 + [testenv:no-libs] commands= pip uninstall -y isal zlib-ng From 7ab5809475305a2d698328b238f2950c7f1a86a6 Mon Sep 17 00:00:00 2001 From: gonzo Date: Sat, 8 Mar 2025 11:48:23 +0100 Subject: [PATCH 03/18] update README.rst with lz4 changes --- README.rst | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index b1039f0..852a370 100644 --- a/README.rst +++ b/README.rst @@ -27,6 +27,7 @@ Supported compression formats are: - bzip2 (``.bz2``) - xz (``.xz``) - Zstandard (``.zst``) (optional) +- lz4 (``.lz4``) (optional) ``xopen`` is compatible with Python versions 3.8 and later. @@ -73,7 +74,7 @@ The function opens the file using a function suitable for the detected file format and returns an open file-like object. When writing, the file format is chosen based on the file name extension: -``.gz``, ``.bz2``, ``.xz``, ``.zst``. This can be overriden with ``format``. +``.gz``, ``.bz2``, ``.xz``, ``.zst``, ``.lz4``. This can be overriden with ``format``. If the extension is not recognized, no compression is used. When reading and a file name extension is available, the format is detected @@ -101,13 +102,13 @@ preferred locale encoding. **compresslevel**: The compression level for writing to gzip, xz and Zstandard files. If set to None, a default depending on the format is used: -gzip: 1, xz: 6, Zstandard: 3. +gzip: 1, xz: 6, Zstandard: 3, lz4: 1. This parameter is ignored for other compression formats. **format**: Override the autodetection of the input or output format. -Possible values are: ``"gz"``, ``"xz"``, ``"bz2"``, ``"zst"``. +Possible values are: ``"gz"``, ``"xz"``, ``"bz2"``, ``"zst"``, ``"lz4"``. **threads**: Set the number of additional threads spawned for compression or decompression. @@ -180,6 +181,20 @@ program or the Python ``zstandard`` package needs to be installed. To ensure that you get the correct ``zstandard`` version, you can specify the ``zstd`` extra for ``xopen``, that is, install it using ``pip install xopen[zstd]``. +Optional lz4 support +-------------------------- + +For reading and writing lz4 (``.lz4``) files, either the ``lz4`` command-line +program or the Python ``lz4`` package needs to be installed. + +* If the ``threads`` parameter to ``xopen()`` is ``None`` (the default) or any value greater than 0, + ``xopen`` uses an external ``lz4`` process. +* If the above fails (because no ``lz4`` program is available) or if ``threads`` is 0, + the ``lz4`` package is used. + +To ensure that ``lz4`` is installed, you can specify the ``lz4`` extra for +``xopen``, that is, install it using ``pip install xopen[lz4]``. + Changelog --------- From bcd85b4aff1dc67d70f65ad751b2b5c073998a69 Mon Sep 17 00:00:00 2001 From: gonzo Date: Mon, 10 Mar 2025 20:46:26 +0100 Subject: [PATCH 04/18] move `lz4` from optional dependency to dependency --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0d6e1f7..84f4399 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ dynamic = ["version"] dependencies = [ 'isal>=1.6.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"', 'zlib-ng>=0.4.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"', + 'lz4>=4.3.2; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"', ] [project.urls] @@ -38,7 +39,6 @@ dev = [ "setuptools_scm[toml]", ] zstd = ["zstandard<1"] -lz4 = ["lz4>=4.3.2"] [tool.setuptools_scm] write_to = "src/xopen/_version.py" From 0d1a11490837eccfebad5c9809674be1f51718e7 Mon Sep 17 00:00:00 2001 From: gonzo Date: Mon, 10 Mar 2025 22:02:41 +0100 Subject: [PATCH 05/18] remove BufferWriter for lz4 --- src/xopen/__init__.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/xopen/__init__.py b/src/xopen/__init__.py index 7744ae3..8154c82 100644 --- a/src/xopen/__init__.py +++ b/src/xopen/__init__.py @@ -585,10 +585,7 @@ def _open_lz4( if lz4 is None: raise ImportError("lz4 module not available") f = lz4.frame.LZ4FrameFile(filename, mode, compression_level=compresslevel) - if "r" in mode: - return f - # Buffer writes on lz4.open to mitigate overhead of small writes - return io.BufferedWriter(f) + return f def _open_gz( @@ -761,7 +758,7 @@ def _file_or_path_to_binary_stream( # object is not binary, this will crash at a later point. return file_or_path, False # type: ignore raise TypeError( - f"Unsupported type for {file_or_path}, " f"{file_or_path.__class__.__name__}." + f"Unsupported type for {file_or_path}, {file_or_path.__class__.__name__}." ) From 64e55de3c7fbd9134ff4c8e4e603771609770b1e Mon Sep 17 00:00:00 2001 From: gonzo Date: Tue, 11 Mar 2025 17:11:18 +0100 Subject: [PATCH 06/18] update tox to have lz4 as dependencie --- .github/workflows/ci.yml | 4 ---- .gitignore | 2 ++ pyproject.toml | 2 +- tests/test_xopen.py | 18 +++++++----------- tox.ini | 5 ----- 5 files changed, 10 insertions(+), 21 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ee42ad4..ca90ce6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -54,7 +54,6 @@ jobs: optional-deps: false with-libs: false with-zstandard: true - with-lz4: true - os: windows-latest python-version: "3.10" steps: @@ -85,9 +84,6 @@ jobs: - name: Test with zstandard if: matrix.with-zstandard run: tox -e zstd - - name: Test with lz4 - if: matrix.with-lz4 - run: tox -e lz4 - name: Upload coverage report uses: codecov/codecov-action@v3 diff --git a/.gitignore b/.gitignore index 47475b8..1ff7a51 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,5 @@ settings.json coverage.xml .vscode/* .DS_Store +build/* +dist/* diff --git a/pyproject.toml b/pyproject.toml index 84f4399..66a413d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dynamic = ["version"] dependencies = [ 'isal>=1.6.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"', 'zlib-ng>=0.4.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"', - 'lz4>=4.3.2; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"', + 'lz4>=4.4.3', ] [project.urls] diff --git a/tests/test_xopen.py b/tests/test_xopen.py index 9fe5736..5211f0d 100644 --- a/tests/test_xopen.py +++ b/tests/test_xopen.py @@ -2,32 +2,28 @@ Tests for the xopen.xopen function """ import bz2 -import subprocess -import sys -import tempfile -from contextlib import contextmanager import functools import gzip import io import lzma import os -from pathlib import Path import shutil +import subprocess +import sys +import tempfile +from contextlib import contextmanager +from pathlib import Path +import lz4.frame import pytest -from xopen import xopen, _detect_format_from_content +from xopen import _detect_format_from_content, xopen try: import zstandard except ImportError: zstandard = None -try: - import lz4.frame -except ImportError: - lz4 = None - # TODO this is duplicated in test_piped.py TEST_DIR = Path(__file__).parent CONTENT_LINES = ["Testing, testing ...\n", "The second line.\n"] diff --git a/tox.ini b/tox.ini index b385ea8..1e063fb 100644 --- a/tox.ini +++ b/tox.ini @@ -21,11 +21,6 @@ deps = {[testenv]deps} zstandard -[testenv:lz4] -deps = - {[testenv]deps} - lz4 - [testenv:no-libs] commands= pip uninstall -y isal zlib-ng From 8e474a866aec103cd9cbf8ed8352cf9804781291 Mon Sep 17 00:00:00 2001 From: gonzo Date: Tue, 11 Mar 2025 17:16:41 +0100 Subject: [PATCH 07/18] set lz4 version to >=4.3.3 (minimun for py3.8) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 66a413d..94c3400 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dynamic = ["version"] dependencies = [ 'isal>=1.6.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"', 'zlib-ng>=0.4.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"', - 'lz4>=4.4.3', + 'lz4>=4.3.3', ] [project.urls] From bf32deb558af29d661bae91b502442f275f38652 Mon Sep 17 00:00:00 2001 From: gonzo Date: Tue, 11 Mar 2025 17:24:19 +0100 Subject: [PATCH 08/18] remove lz4 as optional dependency from test_xopen.py --- tests/test_xopen.py | 35 ++--------------------------------- 1 file changed, 2 insertions(+), 33 deletions(-) diff --git a/tests/test_xopen.py b/tests/test_xopen.py index 5211f0d..3d69497 100644 --- a/tests/test_xopen.py +++ b/tests/test_xopen.py @@ -111,8 +111,6 @@ def test_binary(fname): def test_roundtrip(ext, tmp_path, threads, mode): if ext == ".zst" and threads == 0 and zstandard is None: return - if ext == ".lz4" and threads == 0 and lz4 is None: - pytest.skip("lz4 not installed") path = tmp_path / f"file{ext}" data = b"Hello" if mode == "b" else "Hello" with xopen(path, "w" + mode, threads=threads) as f: @@ -124,8 +122,6 @@ def test_roundtrip(ext, tmp_path, threads, mode): def test_binary_no_isal_no_threads(fname, xopen_without_igzip): if fname.endswith(".zst") and zstandard is None: return - if fname.endswith(".lz4") and lz4 is None: - pytest.skip("lz4 not installed") with xopen_without_igzip(fname, "rb", threads=0) as f: lines = list(f) assert len(lines) == 2 @@ -276,8 +272,6 @@ def test_invalid_compression_level(tmp_path): def test_append(ext, threads, tmp_path): if ext == ".zst" and zstandard is None and threads == 0: pytest.skip("No zstandard installed") - if ext == ".lz4" and lz4 is None and threads == 0: - pytest.skip("No lz4 installed") text = b"AB" reference = text + text path = tmp_path / f"the-file{ext}" @@ -359,14 +353,11 @@ def test_read_no_threads(ext): ".gz": gzip.GzipFile, ".xz": lzma.LZMAFile, ".zst": io.BufferedReader, + ".lz4": lz4.frame.LZ4FrameFile, "": io.BufferedReader, } if ext == ".zst" and zstandard is None: return - if ext == ".lz4" and lz4 is None: - pytest.skip("lz4 not installed") - elif ext == ".lz4" and lz4: - klasses[".lz4"] = lz4.frame.LZ4FrameFile klass = klasses[ext] with xopen(TEST_DIR / f"file.txt{ext}", "rb", threads=0) as f: assert isinstance(f, klass), f @@ -393,18 +384,13 @@ def test_write_no_threads(tmp_path, ext): ".bz2": bz2.BZ2File, ".gz": gzip.GzipFile, ".xz": lzma.LZMAFile, + ".lz4": lz4.frame.LZ4FrameFile, "": io.BufferedWriter, } if ext == ".zst": # Skip zst because if python-zstandard is not installed, # we fall back to an external process even when threads=0 return - if ext == ".lz4" and lz4 is None: - # if lz4 is not installed, we skip this test - pytest.skip("lz4 not installed") - elif ext == ".lz4" and lz4: - # test if lz4 is installed - klasses[".lz4"] = lz4.frame.LZ4FrameFile klass = klasses[ext] with xopen(tmp_path / f"out{ext}", "wb", threads=0) as f: if isinstance(f, io.BufferedWriter): @@ -546,8 +532,6 @@ def test_override_output_format_wrong_format(tmp_path): def test_text_encoding_newline_passthrough(opener, extension, tmp_path): if extension == ".zst" and zstandard is None: return - if extension == ".lz4" and lz4 is None: - pytest.skip("lz4 not installed") # "Eén ree\nTwee reeën\n" latin-1 encoded with \r for as line separator. encoded_text = b"E\xe9n ree\rTwee ree\xebn\r" path = tmp_path / f"test.txt{extension}" @@ -563,8 +547,6 @@ def test_text_encoding_newline_passthrough(opener, extension, tmp_path): def test_text_encoding_errors(opener, extension, tmp_path): if extension == ".zst" and zstandard is None: return - if extension == ".lz4" and lz4 is None: - pytest.skip("lz4 not installed") # "Eén ree\nTwee reeën\n" latin-1 encoded. This is not valid ascii. encoded_text = b"E\xe9n ree\nTwee ree\xebn\n" path = tmp_path / f"test.txt{extension}" @@ -621,9 +603,6 @@ def test_xopen_zst_long_window_size(threads): def test_pass_file_object_for_reading(ext, threads): if ext == ".zst" and zstandard is None: return - if ext == ".lz4" and lz4 is None: - pytest.skip("lz4 not installed") - with open(TEST_DIR / f"file.txt{ext}", "rb") as fh: with xopen(fh, mode="rb", threads=threads) as f: assert f.readline() == CONTENT_LINES[0].encode("utf-8") @@ -651,8 +630,6 @@ def test_pass_bytesio_for_reading_and_writing(ext, threads): format = None if ext == ".zst" and zstandard is None: return - if ext == ".lz4" and lz4 is None: - pytest.skip("lz4 not installed") first_line = CONTENT_LINES[0].encode("utf-8") writer = xopen(filelike, "wb", format=format, threads=threads) writer.write(first_line) @@ -668,8 +645,6 @@ def test_pass_bytesio_for_reading_and_writing(ext, threads): def test_xopen_stdin(monkeypatch, ext, threads): if ext == ".zst" and zstandard is None: return - if ext == ".lz4" and lz4 is None: - pytest.skip("lz4 not installed") # Add encoding to suppress encoding warnings with open(TEST_DIR / f"file.txt{ext}", "rt", encoding="latin-1") as in_file: monkeypatch.setattr("sys.stdin", in_file) @@ -693,8 +668,6 @@ def test_xopen_stdout(monkeypatch): def test_xopen_read_from_pipe(ext, threads): if ext == ".zst" and zstandard is None: return - if ext == ".lz4" and lz4 is None: - pytest.skip("lz4 not installed") in_file = TEST_DIR / f"file.txt{ext}" process = subprocess.Popen(("cat", str(in_file)), stdout=subprocess.PIPE) with xopen(process.stdout, "rt", threads=threads) as f: @@ -708,8 +681,6 @@ def test_xopen_read_from_pipe(ext, threads): def test_xopen_write_to_pipe(threads, ext): if ext == ".zst" and zstandard is None: return - if ext == ".lz4" and lz4 is None: - pytest.skip("lz4 not installed") format = ext.lstrip(".") if format == "": format = None @@ -731,8 +702,6 @@ def test_xopen_write_to_pipe(threads, ext): def test_xopen_dev_stdin_read(threads, ext): if ext == ".zst" and zstandard is None: return - if ext == ".lz4" and lz4 is None: - pytest.skip("lz4 not installed") file = str(Path(__file__).parent / f"file.txt{ext}") result = subprocess.run( f"cat {file} | python -c 'import xopen; " From 937629808d0f1dbe85ce3f822ee6a807a63e03bf Mon Sep 17 00:00:00 2001 From: gonzo Date: Thu, 13 Mar 2025 09:54:35 +0100 Subject: [PATCH 09/18] set lz4 compression levels aligned with python lz4 [0-16], as CLi accepts those values without problem. --- src/xopen/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/xopen/__init__.py b/src/xopen/__init__.py index 8154c82..6996062 100644 --- a/src/xopen/__init__.py +++ b/src/xopen/__init__.py @@ -43,7 +43,7 @@ XOPEN_DEFAULT_BZ2_COMPRESSION = 9 XOPEN_DEFAULT_XZ_COMPRESSION = 6 XOPEN_DEFAULT_ZST_COMPRESSION = 3 -XOPEN_DEFAULT_LZ4_COMPRESSION = 1 +XOPEN_DEFAULT_LZ4_COMPRESSION = 0 igzip: Optional[ModuleType] isal_zlib: Optional[ModuleType] @@ -126,7 +126,7 @@ class _ProgramSettings: "zstd": _ProgramSettings(("zstd",), tuple(range(1, 20)), "-T"), "pigz": _ProgramSettings(("pigz", "--no-name"), tuple(range(0, 10)) + (11,), "-p"), "gzip": _ProgramSettings(("gzip", "--no-name"), tuple(range(1, 10))), - "lz4": _ProgramSettings(("lz4",), tuple(range(1, 13)), "-T"), + "lz4": _ProgramSettings(("lz4",), tuple(range(0, 17)), "-T"), } From d3992d94628e46bfeba64fa5a5e8f526af943e6d Mon Sep 17 00:00:00 2001 From: gonzo Date: Thu, 27 Mar 2025 13:10:43 +0100 Subject: [PATCH 10/18] resolve comments from PR --- .gitignore | 7 ------- README.rst | 19 ++++--------------- pyproject.toml | 21 ++++++--------------- src/xopen/__init__.py | 4 ++-- tests/test_xopen.py | 4 +--- 5 files changed, 13 insertions(+), 42 deletions(-) diff --git a/.gitignore b/.gitignore index 1ff7a51..5c08169 100644 --- a/.gitignore +++ b/.gitignore @@ -5,10 +5,3 @@ __pycache__/ .tox venv/ src/xopen/_version.py -settings.json -.coverage -coverage.xml -.vscode/* -.DS_Store -build/* -dist/* diff --git a/README.rst b/README.rst index 852a370..1a9dc24 100644 --- a/README.rst +++ b/README.rst @@ -26,8 +26,8 @@ Supported compression formats are: - gzip (``.gz``) - bzip2 (``.bz2``) - xz (``.xz``) +- lz4 (``.lz4``) - Zstandard (``.zst``) (optional) -- lz4 (``.lz4``) (optional) ``xopen`` is compatible with Python versions 3.8 and later. @@ -141,6 +141,9 @@ built-in support for multithreaded compression. For bz2 files, `pbzip2 (parallel bzip2) `_ is used. +For lz4 files, [python lz4](https://python-lz4.readthedocs.io/en/stable/index.html) +package is used. + ``xopen`` falls back to Python’s built-in functions (``gzip.open``, ``lzma.open``, ``bz2.open``) if none of the other methods can be used. @@ -181,20 +184,6 @@ program or the Python ``zstandard`` package needs to be installed. To ensure that you get the correct ``zstandard`` version, you can specify the ``zstd`` extra for ``xopen``, that is, install it using ``pip install xopen[zstd]``. -Optional lz4 support --------------------------- - -For reading and writing lz4 (``.lz4``) files, either the ``lz4`` command-line -program or the Python ``lz4`` package needs to be installed. - -* If the ``threads`` parameter to ``xopen()`` is ``None`` (the default) or any value greater than 0, - ``xopen`` uses an external ``lz4`` process. -* If the above fails (because no ``lz4`` program is available) or if ``threads`` is 0, - the ``lz4`` package is used. - -To ensure that ``lz4`` is installed, you can specify the ``lz4`` extra for -``xopen``, that is, install it using ``pip install xopen[lz4]``. - Changelog --------- diff --git a/pyproject.toml b/pyproject.toml index 94c3400..42b281b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,39 +5,30 @@ build-backend = "setuptools.build_meta" [project] name = "xopen" authors = [ - { name = "Marcel Martin", email = "marcel.martin@scilifelab.se" }, - { name = "Ruben Vorderman", email = "r.h.p.vorderman@lumc.nl" }, + {name = "Marcel Martin", email = "marcel.martin@scilifelab.se"}, + {name = "Ruben Vorderman", email = "r.h.p.vorderman@lumc.nl"} ] description = "Open compressed files transparently" readme = "README.rst" -license = { text = "MIT" } +license = {text = "MIT"} classifiers = [ "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3" ] requires-python = ">=3.8" dynamic = ["version"] dependencies = [ 'isal>=1.6.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"', 'zlib-ng>=0.4.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"', - 'lz4>=4.3.3', + 'lz4>4.3.1' ] [project.urls] homepage = "https://github.com/pycompression/xopen/" [project.optional-dependencies] -dev = [ - "pytest", - "pytest-timeout", - "tox", - "black", - "flake8", - "mypy", - "twine", - "setuptools_scm[toml]", -] +dev = ["pytest"] zstd = ["zstandard<1"] [tool.setuptools_scm] diff --git a/src/xopen/__init__.py b/src/xopen/__init__.py index 6996062..472e1b1 100644 --- a/src/xopen/__init__.py +++ b/src/xopen/__init__.py @@ -126,7 +126,7 @@ class _ProgramSettings: "zstd": _ProgramSettings(("zstd",), tuple(range(1, 20)), "-T"), "pigz": _ProgramSettings(("pigz", "--no-name"), tuple(range(0, 10)) + (11,), "-p"), "gzip": _ProgramSettings(("gzip", "--no-name"), tuple(range(1, 10))), - "lz4": _ProgramSettings(("lz4",), tuple(range(0, 17)), "-T"), + "lz4": _ProgramSettings(("lz4",), tuple(range(0, 17))), } @@ -850,7 +850,7 @@ def xopen( # noqa: C901 compresslevel is the compression level for writing to gzip, xz and zst files. This parameter is ignored for the other compression formats. If set to None, a default depending on the format is used: - gzip: 6, xz: 6, zstd: 3, lz4: 1. + gzip: 6, xz: 6, zstd: 3, lz4: 0. When threads is None (the default), compressed file formats are read or written using a pipe to a subprocess running an external tool such as, diff --git a/tests/test_xopen.py b/tests/test_xopen.py index 3d69497..31562df 100644 --- a/tests/test_xopen.py +++ b/tests/test_xopen.py @@ -28,11 +28,9 @@ TEST_DIR = Path(__file__).parent CONTENT_LINES = ["Testing, testing ...\n", "The second line.\n"] CONTENT = "".join(CONTENT_LINES) -extensions = ["", ".gz", ".bz2", ".xz"] +extensions = ["", ".gz", ".bz2", ".xz", ".lz4"] if shutil.which("zstd") or zstandard: extensions += [".zst"] -if shutil.which("lz4") or lz4: - extensions += [".lz4"] base = os.path.join(os.path.dirname(__file__), "file.txt") files = [base + ext for ext in extensions] From 0a3bacd461b53f607f1f622bceb1dca083d4aca1 Mon Sep 17 00:00:00 2001 From: gonzo Date: Sat, 7 Jun 2025 17:17:53 +0200 Subject: [PATCH 11/18] fixes for pypy tests - conditionaly import lz4 - manage missing lz4 on tests - add lz4 conditionaly to pyproject.toml --- pyproject.toml | 2 +- src/xopen/__init__.py | 24 +++++++++++++++--------- tests/test_xopen.py | 17 ++++++++++++++--- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 42b281b..1c06eb1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dynamic = ["version"] dependencies = [ 'isal>=1.6.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"', 'zlib-ng>=0.4.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"', - 'lz4>4.3.1' + 'lz4>4.3.1; platform_python_implementation != "PyPy"', ] [project.urls] diff --git a/src/xopen/__init__.py b/src/xopen/__init__.py index 472e1b1..be7f935 100644 --- a/src/xopen/__init__.py +++ b/src/xopen/__init__.py @@ -568,7 +568,12 @@ def _open_lz4( if compresslevel is None: compresslevel = XOPEN_DEFAULT_LZ4_COMPRESSION - if threads != 0: + if lz4 is not None and (mode == "rb" or (mode in ("ab", "wb") and threads == 0)): + # use Python bindings + f = lz4.frame.LZ4FrameFile(filename, mode, compression_level=compresslevel) + return f + else: + # use CLI program try: return _PipedCompressionProgram( filename, @@ -578,14 +583,15 @@ def _open_lz4( program_settings=_PROGRAM_SETTINGS["lz4"], ) except OSError: - if lz4 is None: - # No fallback available - raise - - if lz4 is None: - raise ImportError("lz4 module not available") - f = lz4.frame.LZ4FrameFile(filename, mode, compression_level=compresslevel) - return f + _program_settings = _PROGRAM_SETTINGS["lz4"] + _program_settings.threads_flag = None + return _PipedCompressionProgram( + filename, + mode, + compresslevel, + threads, + program_settings=_program_settings, + ) def _open_gz( diff --git a/tests/test_xopen.py b/tests/test_xopen.py index 31562df..7fce912 100644 --- a/tests/test_xopen.py +++ b/tests/test_xopen.py @@ -14,11 +14,15 @@ from contextlib import contextmanager from pathlib import Path -import lz4.frame + import pytest from xopen import _detect_format_from_content, xopen +try: + import lz4.frame +except ImportError: + lz4 = None try: import zstandard except ImportError: @@ -351,11 +355,14 @@ def test_read_no_threads(ext): ".gz": gzip.GzipFile, ".xz": lzma.LZMAFile, ".zst": io.BufferedReader, - ".lz4": lz4.frame.LZ4FrameFile, "": io.BufferedReader, } if ext == ".zst" and zstandard is None: return + if ext == ".lz4" and lz4 is None: + return + if ext == ".lz4" and lz4.frame is not None: + klasses[".lz4"] = lz4.frame.LZ4FrameFile klass = klasses[ext] with xopen(TEST_DIR / f"file.txt{ext}", "rb", threads=0) as f: assert isinstance(f, klass), f @@ -382,13 +389,17 @@ def test_write_no_threads(tmp_path, ext): ".bz2": bz2.BZ2File, ".gz": gzip.GzipFile, ".xz": lzma.LZMAFile, - ".lz4": lz4.frame.LZ4FrameFile, "": io.BufferedWriter, } if ext == ".zst": # Skip zst because if python-zstandard is not installed, # we fall back to an external process even when threads=0 return + if ext == ".lz4" and lz4 is None: + # Skip lz4 if lz4 is not installed + return + if ext == ".lz4" and lz4.frame is not None: + klasses[".lz4"] = lz4.frame.LZ4FrameFile klass = klasses[ext] with xopen(tmp_path / f"out{ext}", "wb", threads=0) as f: if isinstance(f, io.BufferedWriter): From ea4d53389a18f232c539e065e7d131391d185e91 Mon Sep 17 00:00:00 2001 From: gonzo Date: Sat, 7 Jun 2025 18:59:31 +0200 Subject: [PATCH 12/18] fix pypy tests on filelike objects --- tests/test_xopen.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_xopen.py b/tests/test_xopen.py index 7fce912..1b9107c 100644 --- a/tests/test_xopen.py +++ b/tests/test_xopen.py @@ -639,6 +639,8 @@ def test_pass_bytesio_for_reading_and_writing(ext, threads): format = None if ext == ".zst" and zstandard is None: return + if ext == ".lz4" and lz4 is None and threads == 0: + return first_line = CONTENT_LINES[0].encode("utf-8") writer = xopen(filelike, "wb", format=format, threads=threads) writer.write(first_line) From 2ee242dcd2292c7e8a90fdab2e567108c69a76bb Mon Sep 17 00:00:00 2001 From: gonzo Date: Thu, 12 Jun 2025 11:45:20 +0200 Subject: [PATCH 13/18] avoid failing tests on lz4 --- tests/test_xopen.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_xopen.py b/tests/test_xopen.py index f8a5396..2a7152d 100644 --- a/tests/test_xopen.py +++ b/tests/test_xopen.py @@ -655,6 +655,9 @@ def test_pass_bytesio_for_reading_and_writing(ext, threads): return if ext == ".lz4" and lz4 is None and threads == 0: return + if ext == ".lz4" and threads != 0: + # _PipedCompressionProgram not working on write mode + return first_line = CONTENT_LINES[0].encode("utf-8") writer = xopen(filelike, "wb", format=format, threads=threads) writer.write(first_line) From e5194f43c463adad766cb25f267b5d0dd9024665 Mon Sep 17 00:00:00 2001 From: gonzo Date: Thu, 12 Jun 2025 13:43:18 +0200 Subject: [PATCH 14/18] tmp_path failing on windows --- tests/test_xopen.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_xopen.py b/tests/test_xopen.py index 2a7152d..8472ced 100644 --- a/tests/test_xopen.py +++ b/tests/test_xopen.py @@ -114,6 +114,8 @@ def test_roundtrip(ext, tmp_path, threads, mode): if ext == ".zst" and threads == 0 and zstandard is None: return path = tmp_path / f"file{ext}" + if not path.parent.exists(): + path.mkdir() data = b"Hello" if mode == "b" else "Hello" with xopen(path, "w" + mode, threads=threads) as f: f.write(data) From f59fa8456e93a12d47ce3f9170e21e30bb991278 Mon Sep 17 00:00:00 2001 From: gonzo Date: Thu, 12 Jun 2025 14:11:44 +0200 Subject: [PATCH 15/18] check if lz4 is present in the system. avoid errors on windows --- tests/test_xopen.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_xopen.py b/tests/test_xopen.py index 8472ced..e820343 100644 --- a/tests/test_xopen.py +++ b/tests/test_xopen.py @@ -113,9 +113,9 @@ def test_binary(fname): def test_roundtrip(ext, tmp_path, threads, mode): if ext == ".zst" and threads == 0 and zstandard is None: return + if ext == ".lz4" and shutil.which("lz4") is None: + return path = tmp_path / f"file{ext}" - if not path.parent.exists(): - path.mkdir() data = b"Hello" if mode == "b" else "Hello" with xopen(path, "w" + mode, threads=threads) as f: f.write(data) From 15d59db548552d7d75fbe95a5da2a1dc50438156 Mon Sep 17 00:00:00 2001 From: gonzo Date: Thu, 12 Jun 2025 14:37:17 +0200 Subject: [PATCH 16/18] check if lz4 is present in the system. avoid errors on windows --- tests/test_xopen.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/test_xopen.py b/tests/test_xopen.py index e820343..3b8c8d2 100644 --- a/tests/test_xopen.py +++ b/tests/test_xopen.py @@ -114,7 +114,7 @@ def test_roundtrip(ext, tmp_path, threads, mode): if ext == ".zst" and threads == 0 and zstandard is None: return if ext == ".lz4" and shutil.which("lz4") is None: - return + pytest.skip("lz4 not installed") path = tmp_path / f"file{ext}" data = b"Hello" if mode == "b" else "Hello" with xopen(path, "w" + mode, threads=threads) as f: @@ -209,6 +209,8 @@ def test_next(fname): def test_has_iter_method(ext, tmp_path): + if ext == ".lz4" and shutil.which("lz4") is None: + pytest.skip("lz4 not installed") path = tmp_path / f"out{ext}" with xopen(path, mode="w") as f: # Writing anything isn’t strictly necessary, but if we don’t, then @@ -276,6 +278,8 @@ def test_invalid_compression_level(tmp_path): def test_append(ext, threads, tmp_path): if ext == ".zst" and zstandard is None and threads == 0: pytest.skip("No zstandard installed") + if ext == ".lz4" and shutil.which("lz4") is None: + pytest.skip("lz4 not installed") text = b"AB" reference = text + text path = tmp_path / f"the-file{ext}" @@ -292,6 +296,8 @@ def test_append(ext, threads, tmp_path): @pytest.mark.parametrize("ext", extensions) def test_append_text(ext, tmp_path): + if ext == ".lz4" and shutil.which("lz4") is None: + pytest.skip("lz4 not installed") text = "AB" reference = text + text path = tmp_path / f"the-file{ext}" @@ -385,6 +391,8 @@ def test_read_no_threads(ext): def test_write_threads(tmp_path, ext): + if ext == ".lz4" and shutil.which("lz4") is None: + pytest.skip("lz4 not installed") path = tmp_path / f"out.{ext}" with xopen(path, mode="w", threads=3) as f: f.write("hello") @@ -465,6 +473,8 @@ def test_read_pathlib_binary(fname): def test_write_pathlib(ext, tmp_path): + if ext == ".lz4" and shutil.which("lz4") is None: + pytest.skip("lz4 not installed") path = tmp_path / f"hello.txt{ext}" with xopen(path, mode="wt") as f: f.write("hello") @@ -473,6 +483,8 @@ def test_write_pathlib(ext, tmp_path): def test_write_pathlib_binary(ext, tmp_path): + if ext == ".lz4" and shutil.which("lz4") is None: + pytest.skip("lz4 not installed") path = tmp_path / f"hello.txt{ext}" with xopen(path, mode="wb") as f: f.write(b"hello") @@ -510,6 +522,8 @@ def test_falls_back_to_lzma_open(lacking_xz_permissions): def test_open_many_writers(tmp_path, ext): + if ext == ".lz4" and shutil.which("lz4") is None: + pytest.skip("lz4 not installed") files = [] # Because lzma.open allocates a lot of memory, # open fewer files to avoid MemoryError on 32-bit architectures @@ -555,6 +569,8 @@ def test_override_output_format_wrong_format(tmp_path): @pytest.mark.parametrize("opener", OPENERS) @pytest.mark.parametrize("extension", extensions) def test_text_encoding_newline_passthrough(opener, extension, tmp_path): + if extension == ".lz4" and shutil.which("lz4") is None: + pytest.skip("lz4 not installed") if extension == ".zst" and zstandard is None: return # "Eén ree\nTwee reeën\n" latin-1 encoded with \r for as line separator. @@ -570,6 +586,8 @@ def test_text_encoding_newline_passthrough(opener, extension, tmp_path): @pytest.mark.parametrize("opener", OPENERS) @pytest.mark.parametrize("extension", extensions) def test_text_encoding_errors(opener, extension, tmp_path): + if extension == ".lz4" and shutil.which("lz4") is None: + pytest.skip("lz4 not installed") if extension == ".zst" and zstandard is None: return # "Eén ree\nTwee reeën\n" latin-1 encoded. This is not valid ascii. @@ -709,6 +727,8 @@ def test_xopen_read_from_pipe(ext, threads): @pytest.mark.parametrize("threads", (0, 1)) def test_xopen_write_to_pipe(threads, ext): + if ext == ".lz4" and shutil.which("lz4") is None: + pytest.skip("lz4 not installed") if ext == ".zst" and zstandard is None: return format = ext.lstrip(".") From 2aea1caff26192bc55868eb13be23988ed7b9002 Mon Sep 17 00:00:00 2001 From: gonzo Date: Thu, 12 Jun 2025 14:41:16 +0200 Subject: [PATCH 17/18] add `pytest.skip` to skip tests that require lz4 --- tests/test_xopen.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_xopen.py b/tests/test_xopen.py index 3b8c8d2..8f1a21b 100644 --- a/tests/test_xopen.py +++ b/tests/test_xopen.py @@ -674,10 +674,10 @@ def test_pass_bytesio_for_reading_and_writing(ext, threads): if ext == ".zst" and zstandard is None: return if ext == ".lz4" and lz4 is None and threads == 0: - return + pytest.skip("lz4 not working for BytesIO in piped write mode") if ext == ".lz4" and threads != 0: # _PipedCompressionProgram not working on write mode - return + pytest.skip("lz4 not working for BytesIO in piped write mode") first_line = CONTENT_LINES[0].encode("utf-8") writer = xopen(filelike, "wb", format=format, threads=threads) writer.write(first_line) From 4c18c24a98e100e6acefd48b832226c37b80aa0a Mon Sep 17 00:00:00 2001 From: gnzsnz <8376642+gnzsnz@users.noreply.github.com> Date: Wed, 23 Jul 2025 13:28:09 +0200 Subject: [PATCH 18/18] fix redundant else and magic reference --- src/xopen/__init__.py | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/src/xopen/__init__.py b/src/xopen/__init__.py index be7f935..5980fde 100644 --- a/src/xopen/__init__.py +++ b/src/xopen/__init__.py @@ -572,26 +572,25 @@ def _open_lz4( # use Python bindings f = lz4.frame.LZ4FrameFile(filename, mode, compression_level=compresslevel) return f - else: - # use CLI program - try: - return _PipedCompressionProgram( - filename, - mode, - compresslevel, - threads, - program_settings=_PROGRAM_SETTINGS["lz4"], - ) - except OSError: - _program_settings = _PROGRAM_SETTINGS["lz4"] - _program_settings.threads_flag = None - return _PipedCompressionProgram( - filename, - mode, - compresslevel, - threads, - program_settings=_program_settings, - ) + # use CLI program + try: + return _PipedCompressionProgram( + filename, + mode, + compresslevel, + threads, + program_settings=_PROGRAM_SETTINGS["lz4"], + ) + except OSError: + _program_settings = _PROGRAM_SETTINGS["lz4"] + _program_settings.threads_flag = None + return _PipedCompressionProgram( + filename, + mode, + compresslevel, + threads, + program_settings=_program_settings, + ) def _open_gz( @@ -727,7 +726,7 @@ def _detect_format_from_content(filename: FileOrPath) -> Optional[str]: # https://datatracker.ietf.org/doc/html/rfc8478#section-3.1.1 return "zst" elif bs[:4] == b"\x04\x22\x4d\x18": - # https://en.wikipedia.org/wiki/LZ4_(compression_algorithm) + # https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md return "lz4" return None