diff --git a/CHANGELOG.md b/CHANGELOG.md index 13580d76..13c9e588 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,14 +1,30 @@ -## [1.39.0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.38.1...v1.39.0) (2025-02-17) +## [1.40.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.39.0...v1.40.0-beta.1) (2025-02-25) ### Features -* add the new handling exception ([5c0bc46](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5c0bc46c6322ea07efa31d95819d7da47462f981)) +* add refactoring of merge and parse ([2c0b459](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/2c0b4591ae4a13a89a73fb29a170adf6e52b3903)) +* update parse node ([8cf9685](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/8cf96857a000eada6d1c9ce1a357ee3d1f2bd003)) ### CI -* **release:** 1.39.0-beta.1 [skip ci] ([9be7dcd](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/9be7dcd8d1f5b64e6a6c29c931f0195e04bb4f23)) +* **release:** 1.39.0-beta.2 [skip ci] ([ac2fcd6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/ac2fcd66ce2603153877e3141b3ff862a348e335)) + +## [1.39.0-beta.2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.39.0-beta.1...v1.39.0-beta.2) (2025-02-25) + + + +### Features + +* add refactoring of merge and parse ([2c0b459](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/2c0b4591ae4a13a89a73fb29a170adf6e52b3903)) + + + +### CI + +* **release:** 1.38.1 [skip ci] ([5c3d62d](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5c3d62d55b5c6dcbb304b5879a19ca09bc18b153)) + ## [1.39.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.38.1-beta.1...v1.39.0-beta.1) (2025-02-17) diff --git a/pyproject.toml b/pyproject.toml index b582716a..7617f99e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,8 @@ [project] name = "scrapegraphai" -version = "1.39.0" +version = "1.40.0b1" + description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines." diff --git a/scrapegraphai/nodes/generate_answer_node.py b/scrapegraphai/nodes/generate_answer_node.py index f7f20cf8..d50b72db 100644 --- a/scrapegraphai/nodes/generate_answer_node.py +++ b/scrapegraphai/nodes/generate_answer_node.py @@ -87,6 +87,37 @@ def invoke_with_timeout(self, chain, inputs, timeout): self.logger.error(f"Error during chain execution: {str(e)}") raise + def process(self, state: dict) -> dict: + """Process the input state and generate an answer.""" + user_prompt = state.get("user_prompt") + # Check for content in different possible state keys + content = ( + state.get("relevant_chunks") + or state.get("parsed_doc") + or state.get("doc") + or state.get("content") + ) + + if not content: + raise ValueError("No content found in state to generate answer from") + + if not user_prompt: + raise ValueError("No user prompt found in state") + + # Create the chain input with both content and question keys + chain_input = { + "content": content, + "question": user_prompt + } + + try: + response = self.invoke_with_timeout(self.chain, chain_input, self.timeout) + state.update({self.output[0]: response}) + return state + except Exception as e: + self.logger.error(f"Error in GenerateAnswerNode: {str(e)}") + raise + def execute(self, state: dict) -> dict: """ Executes the GenerateAnswerNode. diff --git a/scrapegraphai/nodes/parse_node.py b/scrapegraphai/nodes/parse_node.py index fbc9ba31..cb61a643 100644 --- a/scrapegraphai/nodes/parse_node.py +++ b/scrapegraphai/nodes/parse_node.py @@ -78,7 +78,6 @@ def execute(self, state: dict) -> dict: self.logger.info(f"--- Executing {self.node_name} Node ---") input_keys = self.get_input_keys(state) - input_data = [state[key] for key in input_keys] docs_transformed = input_data[0] source = input_data[1] if self.parse_urls else None @@ -121,6 +120,9 @@ def execute(self, state: dict) -> dict: ) state.update({self.output[0]: chunks}) + state.update({"parsed_doc": chunks}) + state.update({"content": chunks}) + if self.parse_urls: state.update({self.output[1]: link_urls}) state.update({self.output[2]: img_urls}) diff --git a/uv.lock b/uv.lock index 000a667c..415aade0 100644 --- a/uv.lock +++ b/uv.lock @@ -3446,7 +3446,7 @@ wheels = [ [[package]] name = "scrapegraphai" -version = "1.36.0" +version = "1.39.0b1" source = { editable = "." } dependencies = [ { name = "async-timeout", version = "4.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" },