Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions libs/community/langchain_community/document_loaders/confluence.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,16 +467,27 @@ def _search_content_by_cql(
if next_url:
response = self.confluence.get(next_url)
else:
url = "rest/api/content/search"
# using "rest/api/search" api which respects includeArchivedSpaces, while "rest/api/content/search" does not
url = "rest/api/search"

params: Dict[str, Any] = {"cql": cql}
params.update(kwargs)
if include_archived_spaces is not None:
params["includeArchivedSpaces"] = include_archived_spaces

# expand params need to be prefixed with ".content", since "rest/api/search" acts one level higher than "rest/api/content/search"
if "expand" in params and params["expand"]:
params["expand"] = ",".join(
[f"content.{item.strip()}" for item in params["expand"].split(",")]
)
response = self.confluence.get(url, params=params)

return response.get("results", []), response.get("_links", {}).get("next", "")
results = response.get("results", [])
pages = []
for item in results:
# return the content field of each result object
pages.append(item["content"])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

KeyError vulnerability: The code assumes every result item has a 'content' field without validation. If the API returns results without this field, the code will crash with a KeyError. Add a safety check: if 'content' in item: pages.append(item['content']) or use item.get('content') with appropriate handling for missing content.

Suggested change
pages.append(item["content"])
if "content" in item:
pages.append(item["content"])

Spotted by Diamond

Fix in Graphite


Is this helpful? React 👍 or 👎 to let us know.

return pages, response.get("_links", {}).get("next", "")

def paginate_request(self, retrieval_method: Callable, **kwargs: Any) -> List:
"""Paginate the various methods to retrieve groups of pages.
Expand Down