diff --git a/libs/community/langchain_community/document_loaders/pdf.py b/libs/community/langchain_community/document_loaders/pdf.py index 6b51e481..3d118913 100644 --- a/libs/community/langchain_community/document_loaders/pdf.py +++ b/libs/community/langchain_community/document_loaders/pdf.py @@ -22,6 +22,7 @@ from urllib.parse import urlparse import requests +from langchain_core._api import deprecated from langchain_core.documents import Document from langchain_core.utils import get_from_dict_or_env @@ -47,6 +48,11 @@ logger = logging.getLogger(__file__) +@deprecated( + since="0.3.24", + removal="1.0", + alternative_import="langchain_unstructured.UnstructuredPDFLoader", +) class UnstructuredPDFLoader(UnstructuredFileLoader): """Load `PDF` files using `Unstructured`. @@ -174,6 +180,11 @@ def source(self) -> str: return self.web_path if self.web_path is not None else self.file_path +@deprecated( + since="0.3.24", + removal="1.0", + alternative_import="langchain_unstructured.UnstructuredPDFLoader", +) class OnlinePDFLoader(BasePDFLoader): """Load online `PDF`.""" @@ -419,6 +430,12 @@ def lazy_load( yield from self.parser.parse(blob) +@deprecated( + since="0.3.24", + removal="1.0", + alternative="langchain_community.document_loaders.generic.GenericLoader", + message="Use GenericLoader and PyPDFParser instead." +) class PyPDFDirectoryLoader(BaseLoader): """Load and parse a directory of PDF files using 'pypdf' library. @@ -1414,4 +1431,10 @@ def lazy_load(self) -> Iterator[Document]: # Legacy: only for backwards compatibility. Use PyPDFLoader instead -PagedPDFSplitter = PyPDFLoader +@deprecated( + since="0.3.24", + removal="1.0", + alternative="PyPDFLoader", +) +class PagedPDFSplitter(PyPDFLoader): + pass