Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion libs/community/langchain_community/document_loaders/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from urllib.parse import urlparse

import requests
from langchain_core._api import deprecated
from langchain_core.documents import Document
from langchain_core.utils import get_from_dict_or_env

Expand All @@ -47,6 +48,11 @@
logger = logging.getLogger(__file__)


@deprecated(
since="0.3.24",
removal="1.0",
alternative_import="langchain_unstructured.UnstructuredPDFLoader",
)
class UnstructuredPDFLoader(UnstructuredFileLoader):
"""Load `PDF` files using `Unstructured`.

Expand Down Expand Up @@ -174,6 +180,11 @@ def source(self) -> str:
return self.web_path if self.web_path is not None else self.file_path


@deprecated(
since="0.3.24",
removal="1.0",
alternative_import="langchain_unstructured.UnstructuredPDFLoader",
)
class OnlinePDFLoader(BasePDFLoader):
"""Load online `PDF`."""

Expand Down Expand Up @@ -419,6 +430,12 @@ def lazy_load(
yield from self.parser.parse(blob)


@deprecated(
since="0.3.24",
removal="1.0",
alternative="langchain_community.document_loaders.generic.GenericLoader",
message="Use GenericLoader and PyPDFParser instead."
)
class PyPDFDirectoryLoader(BaseLoader):
"""Load and parse a directory of PDF files using 'pypdf' library.

Expand Down Expand Up @@ -1414,4 +1431,10 @@ def lazy_load(self) -> Iterator[Document]:


# Legacy: only for backwards compatibility. Use PyPDFLoader instead
PagedPDFSplitter = PyPDFLoader
@deprecated(
since="0.3.24",
removal="1.0",
alternative="PyPDFLoader",
)
class PagedPDFSplitter(PyPDFLoader):
pass