Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update images type and docstring #1018

Merged
merged 3 commits into from
Mar 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions src/smolagents/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from collections import deque
from logging import getLogger
from pathlib import Path
from typing import Any, Callable, Dict, Generator, List, Optional, Set, Tuple, TypedDict, Union
from typing import TYPE_CHECKING, Any, Callable, Dict, Generator, List, Optional, Set, Tuple, TypedDict, Union

import jinja2
import yaml
Expand All @@ -36,6 +36,10 @@
from rich.rule import Rule
from rich.text import Text


if TYPE_CHECKING:
import PIL.Image

from .agent_types import AgentAudio, AgentImage, AgentType, handle_agent_output_types
from .default_tools import TOOL_MAPPING, FinalAnswerTool
from .local_python_executor import BASE_BUILTIN_MODULES, LocalPythonExecutor, PythonExecutor, fix_final_answer_code
Expand Down Expand Up @@ -264,7 +268,7 @@ def run(
task: str,
stream: bool = False,
reset: bool = True,
images: Optional[List[str]] = None,
images: Optional[List["PIL.Image.Image"]] = None,
additional_args: Optional[Dict] = None,
max_steps: Optional[int] = None,
):
Expand All @@ -275,7 +279,7 @@ def run(
task (`str`): Task to perform.
stream (`bool`): Whether to run in a streaming way.
reset (`bool`): Whether to reset the conversation or keep it going from previous run.
images (`list[str]`, *optional*): Paths to image(s).
images (`list[PIL.Image.Image]`, *optional*): Image(s) objects.
additional_args (`dict`, *optional*): Any other variables that you want to pass to the agent run, for instance images or dataframes. Give them clear names!
max_steps (`int`, *optional*): Maximum number of steps the agent can take to solve the task. if not provided, will use the agent's default value.

Expand Down Expand Up @@ -319,7 +323,7 @@ def run(
return deque(self._run(task=self.task, max_steps=max_steps, images=images), maxlen=1)[0]

def _run(
self, task: str, max_steps: int, images: List[str] | None = None
self, task: str, max_steps: int, images: List["PIL.Image.Image"] | None = None
) -> Generator[ActionStep | AgentType, None, None]:
final_answer = None
self.step_number = 1
Expand All @@ -344,7 +348,7 @@ def _run(
yield memory_step
yield handle_agent_output_types(final_answer)

def _create_memory_step(self, step_start_time: float, images: List[str] | None) -> ActionStep:
def _create_memory_step(self, step_start_time: float, images: List["PIL.Image.Image"] | None) -> ActionStep:
return ActionStep(step_number=self.step_number, start_time=step_start_time, observations_images=images)

def _execute_step(self, task: str, memory_step: ActionStep) -> Union[None, Any]:
Expand Down Expand Up @@ -373,7 +377,7 @@ def _finalize_step(self, memory_step: ActionStep, step_start_time: float):
memory_step, agent=self
)

def _handle_max_steps_reached(self, task: str, images: List[str], step_start_time: float) -> Any:
def _handle_max_steps_reached(self, task: str, images: List["PIL.Image.Image"], step_start_time: float) -> Any:
final_answer = self.provide_final_answer(task, images)
final_memory_step = ActionStep(
step_number=self.step_number, error=AgentMaxStepsError("Reached max steps.", self.logger)
Expand Down Expand Up @@ -557,13 +561,13 @@ def extract_action(self, model_output: str, split_token: str) -> Tuple[str, str]
)
return rationale.strip(), action.strip()

def provide_final_answer(self, task: str, images: Optional[list[str]]) -> str:
def provide_final_answer(self, task: str, images: Optional[list["PIL.Image.Image"]]) -> str:
"""
Provide the final answer to the task, based on the logs of the agent's interactions.

Args:
task (`str`): Task to perform.
images (`list[str]`, *optional*): Paths to image(s).
images (`list[PIL.Image.Image]`, *optional*): Image(s) objects.

Returns:
`str`: Final answer to the task.
Expand Down
6 changes: 4 additions & 2 deletions src/smolagents/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@


if TYPE_CHECKING:
import PIL.Image

from smolagents.models import ChatMessage
from smolagents.monitoring import AgentLogger

Expand Down Expand Up @@ -58,7 +60,7 @@ class ActionStep(MemoryStep):
model_output_message: ChatMessage = None
model_output: str | None = None
observations: str | None = None
observations_images: List[str] | None = None
observations_images: List["PIL.Image.Image"] | None = None
action_output: Any = None

def dict(self):
Expand Down Expand Up @@ -169,7 +171,7 @@ def to_messages(self, summary_mode: bool, **kwargs) -> List[Message]:
@dataclass
class TaskStep(MemoryStep):
task: str
task_images: List[str] | None = None
task_images: List["PIL.Image.Image"] | None = None

def to_messages(self, summary_mode: bool = False, **kwargs) -> List[Message]:
content = [{"type": "text", "text": f"New task:\n{self.task}"}]
Expand Down