11"""Test the pypdf.filters module."""
2+
3+ import importlib .util
24import os
35import shutil
46import string
57import subprocess
68from io import BytesIO
79from itertools import product as cartesian_product
810from pathlib import Path
11+ from typing import cast
912
10- import brotli
1113import pytest
1214from PIL import Image , ImageOps
1315
1416from pypdf import PdfReader
17+ from pypdf .constants import FilterTypeAbbreviations as FTA
18+ from pypdf .constants import FilterTypes as FT
19+ from pypdf .constants import StreamAttributes as SA
1520from pypdf .errors import DeprecationError , PdfReadError
1621from pypdf .filters import (
1722 ASCII85Decode ,
2429)
2530from pypdf .generic import ArrayObject , DictionaryObject , IndirectObject , NameObject , NumberObject
2631
27- from typing import cast
28-
29- from pypdf .constants import FilterTypeAbbreviations as FTA
30- from pypdf .constants import FilterTypes as FT
31- from pypdf .constants import StreamAttributes as SA
3232from . import PILContext , get_data_from_url
3333from .test_encryption import HAS_AES
3434from .test_images import image_similarity
4949
5050
5151# Helper function for subprocess testing without brotli
52- def _run_script_without_brotli (tmp_path , script_content ):
52+ def _run_script_without_brotli (tmp_path , script_content ) -> None :
5353 env = os .environ .copy ()
54- env ["COVERAGE_PROCESS_START" ] = str (PROJECT_ROOT / "pyproject.toml" ) # Ensure coverage
54+ env ["COVERAGE_PROCESS_START" ] = str (PROJECT_ROOT / "pyproject.toml" ) # Ensure coverage
5555
5656 source_file = tmp_path / "script_no_brotli.py"
5757 source_file .write_text (script_content )
@@ -61,25 +61,23 @@ def _run_script_without_brotli(tmp_path, script_content):
6161 except KeyError :
6262 env ["PYTHONPATH" ] = str (PROJECT_ROOT )
6363
64- result = subprocess .run (
64+ result = subprocess .run ( # noqa: S603
6565 [shutil .which ("python" ), source_file ],
6666 capture_output = True ,
6767 env = env ,
68- cwd = PROJECT_ROOT , # Run from project root
68+ cwd = PROJECT_ROOT , # Run from project root
6969 )
7070 # Check stderr for unexpected errors from the subprocess itself
7171 if result .stderr :
72- print ( f"Subprocess stderr: \n { result . stderr . decode () } " ) # Print stderr for debugging
72+ pass # Print removed for committed code
7373 assert result .returncode == 0 , f"Subprocess failed with exit code { result .returncode } "
7474 # Allow specific stdout messages if needed, otherwise assert empty
7575 # assert result.stdout == b"", "Subprocess produced unexpected stdout"
7676 # Allow specific stderr messages if needed, otherwise assert empty
7777 # assert result.stderr == b"", "Subprocess produced unexpected stderr"
7878
7979
80- @pytest .mark .parametrize (
81- ("predictor" , "s" ), list (cartesian_product ([1 ], filter_inputs ))
82- )
80+ @pytest .mark .parametrize (("predictor" , "s" ), list (cartesian_product ([1 ], filter_inputs )))
8381def test_flate_decode_encode (predictor , s ):
8482 """FlateDecode encode() and decode() methods work as expected."""
8583 codec = FlateDecode ()
@@ -314,17 +312,13 @@ class Pdf:
314312 def get_object (self , reference ) -> NumberObject :
315313 return NumberObject (42 )
316314
317- parameters = CCITTFaxDecode ._get_parameters (
318- parameters = None , rows = IndirectObject (13 , 1 , Pdf ())
319- )
315+ parameters = CCITTFaxDecode ._get_parameters (parameters = None , rows = IndirectObject (13 , 1 , Pdf ()))
320316 assert parameters .rows == 42
321317
322318
323319def test_ccitt_fax_decode ():
324320 data = b""
325- parameters = DictionaryObject (
326- {"/K" : NumberObject (- 1 ), "/Columns" : NumberObject (17 )}
327- )
321+ parameters = DictionaryObject ({"/K" : NumberObject (- 1 ), "/Columns" : NumberObject (17 )})
328322
329323 # This was just the result pypdf 1.27.9 returned.
330324 # It would be awesome if we could check if that is actually correct.
@@ -402,10 +396,7 @@ def test_image_without_pillow(tmp_path):
402396 )
403397 assert result .returncode == 0
404398 assert result .stdout == b""
405- assert (
406- result .stderr .replace (b"\r " , b"" )
407- == b"Superfluous whitespace found in object header b'4' b'0'\n "
408- )
399+ assert result .stderr .replace (b"\r " , b"" ) == b"Superfluous whitespace found in object header b'4' b'0'\n "
409400
410401
411402@pytest .mark .enable_socket
@@ -448,9 +439,7 @@ def test_png_transparency_reverse():
448439 """Cf issue #1599"""
449440 pdf_path = RESOURCE_ROOT / "labeled-edges-center-image.pdf"
450441 reader = PdfReader (pdf_path )
451- refimg = Image .open (
452- BytesIO (get_data_from_url (name = "labeled-edges-center-image.png" ))
453- )
442+ refimg = Image .open (BytesIO (get_data_from_url (name = "labeled-edges-center-image.png" )))
454443 data = reader .pages [0 ].images [0 ]
455444 img = Image .open (BytesIO (data .data ))
456445 assert ".jp2" in data .name
@@ -491,9 +480,7 @@ def test_rgba():
491480 reader = PdfReader (BytesIO (get_data_from_url (name = "tika-972174.pdf" )))
492481 data = reader .pages [0 ].images [0 ]
493482 assert ".jp2" in data .name
494- similarity = image_similarity (
495- data .image , BytesIO (get_data_from_url (name = "tika-972174_p0-im0.png" ))
496- )
483+ similarity = image_similarity (data .image , BytesIO (get_data_from_url (name = "tika-972174_p0-im0.png" )))
497484 assert similarity > 0.99
498485
499486
@@ -793,11 +780,11 @@ def test_main_decode_brotli_without_brotli_installed_subprocess(tmp_path):
793780 # We need brotli here in the main process to create the test data
794781 try :
795782 import brotli
783+
796784 compressed_data = brotli .compress (original_data )
797785 except ImportError :
798786 pytest .skip ("brotli library not installed in the main test environment" )
799787
800-
801788 script = f"""
802789import sys
803790import pytest
@@ -830,9 +817,7 @@ def test_main_decode_brotli_without_brotli_installed_subprocess(tmp_path):
830817# Renamed from test_main_decode_brotli
831818def test_main_decode_brotli_installed ():
832819 """Test the main decode function with Brotli filter using a real PDF."""
833- try :
834- import brotli
835- except ImportError :
820+ if importlib .util .find_spec ("brotli" ) is None :
836821 pytest .skip ("brotli library not installed" )
837822
838823 # Use the prototype PDF provided by PDF Association
@@ -847,14 +832,13 @@ def test_main_decode_brotli_installed():
847832 content_stream_ref = page [NameObject ("/Contents" )]
848833 # Handle cases where /Contents might be an array
849834 if isinstance (content_stream_ref , ArrayObject ):
850- # For simplicity, let's assume the first stream in the array uses Brotli
851- # A more robust test might check all streams or find one specifically with /BrotliDecode
852- if not content_stream_ref :
853- pytest .skip ("Content stream array is empty." )
854- stream_obj = content_stream_ref [0 ].get_object ()
835+ # For simplicity, let's assume the first stream in the array uses Brotli
836+ # A more robust test might check all streams or find one specifically with /BrotliDecode
837+ if not content_stream_ref :
838+ pytest .skip ("Content stream array is empty." )
839+ stream_obj = content_stream_ref [0 ].get_object ()
855840 else :
856- stream_obj = content_stream_ref .get_object ()
857-
841+ stream_obj = content_stream_ref .get_object ()
858842
859843 # Check if the stream actually uses BrotliDecode
860844 filters = stream_obj .get (SA .FILTER , ())
@@ -864,11 +848,11 @@ def test_main_decode_brotli_installed():
864848 filters = (filters ,)
865849
866850 if FT .BROTLI_DECODE not in filters and FTA .BR not in filters :
867- pytest .skip ("Selected stream does not use BrotliDecode filter." )
868-
851+ pytest .skip ("Selected stream does not use BrotliDecode filter." )
869852
870853 # Call the main decode function directly on the stream object
871854 from pypdf import filters
855+
872856 try :
873857 decoded_data = filters .decode_stream_data (stream_obj )
874858 except Exception as e :
0 commit comments