From 24560c53cd3d65a6151afc9defd8560d09678c2b Mon Sep 17 00:00:00 2001
From: Philippe Prados <github@prados.fr>
Date: Mon, 10 Mar 2025 09:50:00 +0100
Subject: [PATCH 1/2] Fix isssue 30098
 https://github.com/langchain-ai/langchain/issues/30098

---
 .../langchain_community/document_loaders/parsers/pdf.py   | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/libs/community/langchain_community/document_loaders/parsers/pdf.py b/libs/community/langchain_community/document_loaders/parsers/pdf.py
index 782edddad44af..904d74756e316 100644
--- a/libs/community/langchain_community/document_loaders/parsers/pdf.py
+++ b/libs/community/langchain_community/document_loaders/parsers/pdf.py
@@ -429,6 +429,7 @@ def extract_images_from_page(self, page: pypdf._page.PageObject) -> str:
         if not self.images_parser:
             return ""
         from PIL import Image
+        import pypdf
 
         if "/XObject" not in cast(dict, page["/Resources"]).keys():
             return ""
@@ -438,13 +439,16 @@ def extract_images_from_page(self, page: pypdf._page.PageObject) -> str:
         for obj in xObject:
             np_image: Any = None
             if xObject[obj]["/Subtype"] == "/Image":
-                if xObject[obj]["/Filter"][1:] in _PDF_FILTER_WITHOUT_LOSS:
+                img_filter = xObject[obj]["/Filter"][1:] if type(
+                    xObject[obj]["/Filter"]) == pypdf.generic._base.NameObject else \
+                    xObject[obj]["/Filter"][0][1:]
+                if img_filter in _PDF_FILTER_WITHOUT_LOSS:
                     height, width = xObject[obj]["/Height"], xObject[obj]["/Width"]
 
                     np_image = np.frombuffer(
                         xObject[obj].get_data(), dtype=np.uint8
                     ).reshape(height, width, -1)
-                elif xObject[obj]["/Filter"][1:] in _PDF_FILTER_WITH_LOSS:
+                elif img_filter in _PDF_FILTER_WITH_LOSS:
                     np_image = np.array(Image.open(io.BytesIO(xObject[obj].get_data())))
 
                 else:

From 5f88c28f95926e10428e2fb318235c4f3b0c8777 Mon Sep 17 00:00:00 2001
From: Philippe Prados <github@prados.fr>
Date: Wed, 26 Mar 2025 08:38:06 +0100
Subject: [PATCH 2/2] Fix bug 30098

---
 .../document_loaders/parsers/pdf.py                    | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/libs/community/langchain_community/document_loaders/parsers/pdf.py b/libs/community/langchain_community/document_loaders/parsers/pdf.py
index 904d74756e316..7824181b9e7f8 100644
--- a/libs/community/langchain_community/document_loaders/parsers/pdf.py
+++ b/libs/community/langchain_community/document_loaders/parsers/pdf.py
@@ -428,8 +428,8 @@ def extract_images_from_page(self, page: pypdf._page.PageObject) -> str:
         """
         if not self.images_parser:
             return ""
-        from PIL import Image
         import pypdf
+        from PIL import Image
 
         if "/XObject" not in cast(dict, page["/Resources"]).keys():
             return ""
@@ -439,9 +439,11 @@ def extract_images_from_page(self, page: pypdf._page.PageObject) -> str:
         for obj in xObject:
             np_image: Any = None
             if xObject[obj]["/Subtype"] == "/Image":
-                img_filter = xObject[obj]["/Filter"][1:] if type(
-                    xObject[obj]["/Filter"]) == pypdf.generic._base.NameObject else \
-                    xObject[obj]["/Filter"][0][1:]
+                img_filter = (
+                    xObject[obj]["/Filter"][1:]
+                    if type(xObject[obj]["/Filter"]) is pypdf.generic._base.NameObject
+                    else xObject[obj]["/Filter"][0][1:]
+                )
                 if img_filter in _PDF_FILTER_WITHOUT_LOSS:
                     height, width = xObject[obj]["/Height"], xObject[obj]["/Width"]