diff --git a/python/semantic_kernel/processes/dapr_runtime/actors/step_actor.py b/python/semantic_kernel/processes/dapr_runtime/actors/step_actor.py index cc2e3c0c23cf..826ddd1f3dea 100644 --- a/python/semantic_kernel/processes/dapr_runtime/actors/step_actor.py +++ b/python/semantic_kernel/processes/dapr_runtime/actors/step_actor.py @@ -38,6 +38,7 @@ from semantic_kernel.processes.process_message_factory import ProcessMessageFactory from semantic_kernel.processes.process_types import get_generic_state_type from semantic_kernel.processes.step_utils import ( + DEFAULT_ALLOWED_MODULE_PREFIXES, find_input_channels, get_fully_qualified_name, get_step_class_from_qualified_name, @@ -57,7 +58,7 @@ def __init__( actor_id: ActorId, kernel: Kernel, factories: dict[str, Callable], - allowed_module_prefixes: Sequence[str] | None = None, + allowed_module_prefixes: Sequence[str] | None = DEFAULT_ALLOWED_MODULE_PREFIXES, ): """Initializes a new instance of StepActor. @@ -66,9 +67,10 @@ def __init__( actor_id: The unique ID for the actor. kernel: The Kernel dependency to be injected. factories: The factory dictionary to use for creating the step. - allowed_module_prefixes: Optional sequence of module prefixes that are allowed - for step class loading. If provided, step classes must come from modules - starting with one of these prefixes. + allowed_module_prefixes: Sequence of module prefixes that are allowed + for step class loading. Step classes must come from modules starting + with one of these prefixes. Defaults to ("semantic_kernel.",). Pass + None to allow any module (not recommended for production). """ super().__init__(ctx, actor_id) self.kernel = kernel diff --git a/python/semantic_kernel/processes/dapr_runtime/dapr_kernel_process_context.py b/python/semantic_kernel/processes/dapr_runtime/dapr_kernel_process_context.py index 8ecf6e0f4418..a8b307dd378d 100644 --- a/python/semantic_kernel/processes/dapr_runtime/dapr_kernel_process_context.py +++ b/python/semantic_kernel/processes/dapr_runtime/dapr_kernel_process_context.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft. All rights reserved. import uuid +from collections.abc import Sequence from dapr.actor import ActorId, ActorProxy @@ -9,6 +10,7 @@ from semantic_kernel.processes.dapr_runtime.interfaces.process_interface import ProcessInterface from semantic_kernel.processes.kernel_process.kernel_process import KernelProcess from semantic_kernel.processes.kernel_process.kernel_process_event import KernelProcessEvent +from semantic_kernel.processes.step_utils import DEFAULT_ALLOWED_MODULE_PREFIXES from semantic_kernel.utils.feature_stage_decorator import experimental @@ -20,13 +22,21 @@ class DaprKernelProcessContext: process: KernelProcess max_supersteps: int = 100 - def __init__(self, process: KernelProcess, max_supersteps: int | None = None) -> None: + def __init__( + self, + process: KernelProcess, + max_supersteps: int | None = None, + allowed_module_prefixes: Sequence[str] | None = DEFAULT_ALLOWED_MODULE_PREFIXES, + ) -> None: """Initialize a new instance of DaprKernelProcessContext. Args: process: The kernel process to start. max_supersteps: The maximum number of supersteps. This is the total number of times process steps will run. Defaults to None, and thus the process will run its steps 100 times. + allowed_module_prefixes: Sequence of module prefixes that are allowed + for step class loading. Defaults to ("semantic_kernel.",). Pass + None to allow any module (not recommended for production). """ if process.state.name is None: raise ValueError("Process state name must not be None") @@ -36,6 +46,7 @@ def __init__(self, process: KernelProcess, max_supersteps: int | None = None) -> if max_supersteps is not None: self.max_supersteps = max_supersteps + self.allowed_module_prefixes = allowed_module_prefixes self.process = process process_id = ActorId(process.state.id) self.dapr_process = ActorProxy.create( # type: ignore @@ -76,4 +87,6 @@ async def get_state(self) -> KernelProcess: """ raw_process_info = await self.dapr_process.get_process_info() dapr_process_info = DaprProcessInfo.model_validate(raw_process_info) - return dapr_process_info.to_kernel_process() + return dapr_process_info.to_kernel_process( + allowed_module_prefixes=self.allowed_module_prefixes, + ) diff --git a/python/semantic_kernel/processes/dapr_runtime/dapr_process_info.py b/python/semantic_kernel/processes/dapr_runtime/dapr_process_info.py index 3df00e2d0cd1..51263bc0139b 100644 --- a/python/semantic_kernel/processes/dapr_runtime/dapr_process_info.py +++ b/python/semantic_kernel/processes/dapr_runtime/dapr_process_info.py @@ -10,6 +10,7 @@ from semantic_kernel.processes.kernel_process.kernel_process import KernelProcess from semantic_kernel.processes.kernel_process.kernel_process_state import KernelProcessState from semantic_kernel.processes.kernel_process.kernel_process_step_info import KernelProcessStepInfo +from semantic_kernel.processes.step_utils import DEFAULT_ALLOWED_MODULE_PREFIXES from semantic_kernel.utils.feature_stage_decorator import experimental @@ -20,13 +21,17 @@ class DaprProcessInfo(DaprStepInfo): type: Literal["DaprProcessInfo"] = "DaprProcessInfo" # type: ignore steps: MutableSequence["DaprStepInfo | DaprProcessInfo"] = Field(default_factory=list) - def to_kernel_process(self, allowed_module_prefixes: Sequence[str] | None = None) -> KernelProcess: + def to_kernel_process( + self, allowed_module_prefixes: Sequence[str] | None = DEFAULT_ALLOWED_MODULE_PREFIXES + ) -> KernelProcess: """Converts the Dapr process info to a kernel process. Args: - allowed_module_prefixes: Optional list of module prefixes that are allowed - for step class loading. If provided, step classes must come from modules - starting with one of these prefixes. + allowed_module_prefixes: Sequence of module prefixes that are allowed + for step class loading. Defaults to DEFAULT_ALLOWED_MODULE_PREFIXES + ("semantic_kernel.",). Pass None to disable the allowlist and allow + any module (not recommended for production). An empty sequence blocks + all modules. """ if not isinstance(self.state, KernelProcessState): raise ValueError("State must be a kernel process state") diff --git a/python/semantic_kernel/processes/dapr_runtime/dapr_step_info.py b/python/semantic_kernel/processes/dapr_runtime/dapr_step_info.py index d34073604939..c21ed34d9de3 100644 --- a/python/semantic_kernel/processes/dapr_runtime/dapr_step_info.py +++ b/python/semantic_kernel/processes/dapr_runtime/dapr_step_info.py @@ -11,7 +11,11 @@ from semantic_kernel.processes.kernel_process.kernel_process_state import KernelProcessState from semantic_kernel.processes.kernel_process.kernel_process_step_info import KernelProcessStepInfo from semantic_kernel.processes.kernel_process.kernel_process_step_state import KernelProcessStepState -from semantic_kernel.processes.step_utils import get_fully_qualified_name, get_step_class_from_qualified_name +from semantic_kernel.processes.step_utils import ( + DEFAULT_ALLOWED_MODULE_PREFIXES, + get_fully_qualified_name, + get_step_class_from_qualified_name, +) from semantic_kernel.utils.feature_stage_decorator import experimental @@ -25,14 +29,16 @@ class DaprStepInfo(KernelBaseModel): edges: dict[str, list[KernelProcessEdge]] = Field(default_factory=dict) def to_kernel_process_step_info( - self, allowed_module_prefixes: Sequence[str] | None = None + self, allowed_module_prefixes: Sequence[str] | None = DEFAULT_ALLOWED_MODULE_PREFIXES ) -> KernelProcessStepInfo: """Converts the Dapr step info to a kernel process step info. Args: - allowed_module_prefixes: Optional list of module prefixes that are allowed - for step class loading. If provided, step classes must come from modules - starting with one of these prefixes. + allowed_module_prefixes: Sequence of module prefixes that are allowed + for step class loading. Defaults to DEFAULT_ALLOWED_MODULE_PREFIXES + ("semantic_kernel.",). Pass None to disable the allowlist and allow + any module (not recommended for production). An empty sequence blocks + all modules. """ inner_step_type = get_step_class_from_qualified_name( self.inner_step_python_type, diff --git a/python/semantic_kernel/processes/step_utils.py b/python/semantic_kernel/processes/step_utils.py index 66c72008ae75..3b435889f124 100644 --- a/python/semantic_kernel/processes/step_utils.py +++ b/python/semantic_kernel/processes/step_utils.py @@ -12,6 +12,8 @@ from semantic_kernel.processes.kernel_process.kernel_process_step_context import KernelProcessStepContext from semantic_kernel.utils.feature_stage_decorator import experimental +DEFAULT_ALLOWED_MODULE_PREFIXES: tuple[str, ...] = ("semantic_kernel.",) + @experimental def find_input_channels( @@ -47,7 +49,7 @@ def get_fully_qualified_name(cls) -> str: @experimental def get_step_class_from_qualified_name( full_class_name: str, - allowed_module_prefixes: Sequence[str] | None = None, + allowed_module_prefixes: Sequence[str] | None = DEFAULT_ALLOWED_MODULE_PREFIXES, ) -> type[KernelProcessStep]: """Loads and validates a KernelProcessStep class from a fully qualified name. @@ -58,11 +60,12 @@ def get_step_class_from_qualified_name( full_class_name: The fully qualified class name in Python import notation (e.g., 'mypackage.mymodule.MyStep'). The module must be importable from the current Python environment. - allowed_module_prefixes: Optional list of module prefixes that are allowed - to be imported. If provided, the module must start with one of these - prefixes. This check is performed BEFORE import to prevent execution - of module-level code in unauthorized modules. If None or empty, any - module is allowed. + allowed_module_prefixes: Sequence of module prefixes that are allowed + to be imported. The module must start with one of these prefixes. + This check is performed BEFORE import to prevent execution of + module-level code in unauthorized modules. Defaults to + ("semantic_kernel.",). Pass None to allow any module (not + recommended for production). An empty sequence blocks all modules. Returns: The validated class type that is a subclass of KernelProcessStep @@ -90,7 +93,12 @@ def get_step_class_from_qualified_name( ) # Check module allowlist BEFORE import to prevent module-level code execution - if allowed_module_prefixes and not any(module_name.startswith(prefix) for prefix in allowed_module_prefixes): + if allowed_module_prefixes is not None and not any( + module_name.startswith(prefix) + if prefix.endswith(".") + else (module_name == prefix or module_name.startswith(prefix + ".")) + for prefix in allowed_module_prefixes + ): raise ProcessInvalidConfigurationException( f"Module '{module_name}' is not in the allowed module prefixes: {allowed_module_prefixes}. " f"Step class '{full_class_name}' cannot be loaded." diff --git a/python/tests/conftest.py b/python/tests/conftest.py index 688021f74f6a..bbb0fc7601e5 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -7,7 +7,6 @@ from unittest.mock import MagicMock from uuid import uuid4 -import pandas as pd from pydantic import BaseModel from pytest import fixture @@ -355,6 +354,8 @@ def definition( @fixture def definition_pandas(index_kind: str, distance_function: str, vector_property_type: str, dimensions: int) -> object: + import pandas as pd + return VectorStoreCollectionDefinition( fields=[ VectorStoreField( diff --git a/python/tests/unit/processes/dapr_runtime/test_dapr_kernel_process_context.py b/python/tests/unit/processes/dapr_runtime/test_dapr_kernel_process_context.py index c344f117a817..5d17ededec1f 100644 --- a/python/tests/unit/processes/dapr_runtime/test_dapr_kernel_process_context.py +++ b/python/tests/unit/processes/dapr_runtime/test_dapr_kernel_process_context.py @@ -41,7 +41,10 @@ def process_context(): mock_dapr_process = AsyncMock(spec=ProcessInterface) mock_actor_proxy_create.return_value = mock_dapr_process - context = DaprKernelProcessContext(process=process) + context = DaprKernelProcessContext( + process=process, + allowed_module_prefixes=("semantic_kernel.", DummyInnerStepType.__module__), + ) yield context, mock_dapr_process diff --git a/python/tests/unit/processes/dapr_runtime/test_step_class_loading.py b/python/tests/unit/processes/dapr_runtime/test_step_class_loading.py index 4fd43f957b79..8e79e9739396 100644 --- a/python/tests/unit/processes/dapr_runtime/test_step_class_loading.py +++ b/python/tests/unit/processes/dapr_runtime/test_step_class_loading.py @@ -22,7 +22,10 @@ class NotAStep: def test_valid_step_class_loads_successfully(): """Test that a valid KernelProcessStep subclass loads correctly.""" full_name = f"{MockValidStep.__module__}.{MockValidStep.__name__}" - result = get_step_class_from_qualified_name(full_name) + result = get_step_class_from_qualified_name( + full_name, + allowed_module_prefixes=[MockValidStep.__module__], + ) assert result is MockValidStep assert issubclass(result, KernelProcessStep) @@ -60,7 +63,7 @@ def test_none_like_empty_raises_exception(): def test_nonexistent_module_raises_exception(): """Test that a non-existent module raises ProcessInvalidConfigurationException.""" with pytest.raises(ProcessInvalidConfigurationException, match="Unable to import module"): - get_step_class_from_qualified_name("nonexistent_module_xyz123.SomeClass") + get_step_class_from_qualified_name("nonexistent_module_xyz123.SomeClass", allowed_module_prefixes=None) def test_nonexistent_class_in_valid_module_raises_exception(): @@ -79,25 +82,25 @@ def test_non_step_class_raises_exception(): """Test that a class not inheriting from KernelProcessStep raises exception.""" full_name = f"{NotAStep.__module__}.{NotAStep.__name__}" with pytest.raises(ProcessInvalidConfigurationException, match="must be a subclass of KernelProcessStep"): - get_step_class_from_qualified_name(full_name) + get_step_class_from_qualified_name(full_name, allowed_module_prefixes=[NotAStep.__module__]) def test_builtin_class_raises_exception(): - """Test that built-in classes like str raise exception.""" + """Test that built-in classes like str raise exception (bypassing prefix check to test subclass validation).""" with pytest.raises(ProcessInvalidConfigurationException, match="must be a subclass of KernelProcessStep"): - get_step_class_from_qualified_name("builtins.str") + get_step_class_from_qualified_name("builtins.str", allowed_module_prefixes=None) def test_os_system_prevented(): - """Test that os.system (a dangerous function, not a class) is prevented.""" + """Test that os.system is prevented (bypassing prefix check to test type validation).""" with pytest.raises(ProcessInvalidConfigurationException, match="is not a class type"): - get_step_class_from_qualified_name("os.system") + get_step_class_from_qualified_name("os.system", allowed_module_prefixes=None) def test_arbitrary_class_prevented(): - """Test that arbitrary classes like subprocess.Popen are prevented.""" + """Test that arbitrary classes like subprocess.Popen are prevented (bypassing prefix check).""" with pytest.raises(ProcessInvalidConfigurationException, match="must be a subclass of KernelProcessStep"): - get_step_class_from_qualified_name("subprocess.Popen") + get_step_class_from_qualified_name("subprocess.Popen", allowed_module_prefixes=None) def test_kernel_class_prevented(): @@ -139,31 +142,51 @@ def test_allowlist_blocks_dangerous_module(): ) -def test_empty_allowlist_allows_all(): - """Test that an empty allowlist allows any module (current behavior).""" +def test_empty_allowlist_blocks_all(): + """Test that an empty allowlist blocks all modules.""" full_name = f"{MockValidStep.__module__}.{MockValidStep.__name__}" - result = get_step_class_from_qualified_name(full_name, allowed_module_prefixes=[]) - assert result is MockValidStep + with pytest.raises(ProcessInvalidConfigurationException, match="is not in the allowed module prefixes"): + get_step_class_from_qualified_name(full_name, allowed_module_prefixes=[]) def test_none_allowlist_allows_all(): - """Test that None allowlist allows any module (default behavior).""" + """Test that None allowlist allows any module (explicit opt-out).""" full_name = f"{MockValidStep.__module__}.{MockValidStep.__name__}" result = get_step_class_from_qualified_name(full_name, allowed_module_prefixes=None) assert result is MockValidStep +def test_default_allowlist_blocks_non_sk_modules(): + """Test that the default allowlist only permits semantic_kernel modules.""" + with pytest.raises(ProcessInvalidConfigurationException, match="is not in the allowed module prefixes"): + get_step_class_from_qualified_name("subprocess.Popen") + + +def test_default_allowlist_permits_sk_modules(): + """Test that the default allowlist permits semantic_kernel modules.""" + full_name = "semantic_kernel.processes.kernel_process.kernel_process_step.KernelProcessStep" + result = get_step_class_from_qualified_name(full_name) + assert result is KernelProcessStep + + def test_allowlist_prefix_matching(): - """Test that allowlist uses prefix matching correctly.""" + """Test that allowlist uses boundary-aware prefix matching correctly.""" full_name = f"{MockValidStep.__module__}.{MockValidStep.__name__}" - # Use a prefix of the actual module name - module_prefix = MockValidStep.__module__[:4] # First 4 chars as prefix + # Exact module name match works result = get_step_class_from_qualified_name( full_name, - allowed_module_prefixes=[module_prefix], + allowed_module_prefixes=[MockValidStep.__module__], ) assert result is MockValidStep + # Arbitrary substring prefix without dot boundary does not match + short_prefix = MockValidStep.__module__[:4] # e.g. "test" + with pytest.raises(ProcessInvalidConfigurationException, match="is not in the allowed module prefixes"): + get_step_class_from_qualified_name( + full_name, + allowed_module_prefixes=[short_prefix], + ) + def test_allowlist_multiple_prefixes(): """Test that multiple allowed prefixes work correctly.""" diff --git a/python/tests/unit/processes/test_step_utils.py b/python/tests/unit/processes/test_step_utils.py index 1a773fa5bc08..1209b5b5a19e 100644 --- a/python/tests/unit/processes/test_step_utils.py +++ b/python/tests/unit/processes/test_step_utils.py @@ -6,6 +6,7 @@ import pytest +from semantic_kernel.exceptions.process_exceptions import ProcessInvalidConfigurationException from semantic_kernel.functions.kernel_function import KernelFunction from semantic_kernel.functions.kernel_function_decorator import kernel_function from semantic_kernel.kernel import Kernel @@ -15,7 +16,11 @@ from semantic_kernel.processes.kernel_process.kernel_process_step_context import ( KernelProcessStepContext, ) -from semantic_kernel.processes.step_utils import find_input_channels, get_fully_qualified_name +from semantic_kernel.processes.step_utils import ( + find_input_channels, + get_fully_qualified_name, + get_step_class_from_qualified_name, +) @pytest.fixture @@ -131,3 +136,69 @@ class InnerClass: result = get_fully_qualified_name(OuterClass.InnerClass) expected = f"{OuterClass.__module__}.InnerClass" assert result == expected + + +# --- Tests for get_step_class_from_qualified_name --- + + +def test_get_step_class_empty_allowlist_blocks_all(): + """An empty allowlist sequence should block all modules.""" + with pytest.raises(ProcessInvalidConfigurationException, match="not in the allowed module prefixes"): + get_step_class_from_qualified_name( + "semantic_kernel.processes.kernel_process.kernel_process_step.KernelProcessStep", + allowed_module_prefixes=[], + ) + + +def test_get_step_class_none_allowlist_allows_any(): + """Passing None should disable the allowlist check entirely.""" + from semantic_kernel.processes.kernel_process.kernel_process_step import KernelProcessStep + + cls = get_step_class_from_qualified_name( + "semantic_kernel.processes.kernel_process.kernel_process_step.KernelProcessStep", + allowed_module_prefixes=None, + ) + assert cls is KernelProcessStep + + +def test_get_step_class_prefix_without_dot_exact_match(): + """A prefix without a trailing dot should match the exact module name.""" + from semantic_kernel.processes.kernel_process.kernel_process_step import KernelProcessStep + + cls = get_step_class_from_qualified_name( + "semantic_kernel.processes.kernel_process.kernel_process_step.KernelProcessStep", + allowed_module_prefixes=["semantic_kernel.processes.kernel_process.kernel_process_step"], + ) + assert cls is KernelProcessStep + + +def test_get_step_class_prefix_without_dot_segment_boundary(): + """A prefix without a trailing dot must not match partial segment names.""" + with pytest.raises(ProcessInvalidConfigurationException, match="not in the allowed module prefixes"): + get_step_class_from_qualified_name( + "semantic_kernel_evil.some_module.SomeClass", + allowed_module_prefixes=["semantic_kernel"], + ) + + +def test_get_step_class_prefix_with_dot_matches_submodule(): + """A prefix with a trailing dot should match submodules.""" + from semantic_kernel.processes.kernel_process.kernel_process_step import KernelProcessStep + + cls = get_step_class_from_qualified_name( + "semantic_kernel.processes.kernel_process.kernel_process_step.KernelProcessStep", + allowed_module_prefixes=["semantic_kernel."], + ) + assert cls is KernelProcessStep + + +def test_get_step_class_default_allowlist_blocks_non_sk_module(): + """Default allowlist should block modules outside semantic_kernel.""" + with pytest.raises(ProcessInvalidConfigurationException, match="not in the allowed module prefixes"): + get_step_class_from_qualified_name("os.path.SomeClass") + + +def test_get_step_class_invalid_format(): + """Invalid class name format should raise an exception.""" + with pytest.raises(ProcessInvalidConfigurationException, match="Invalid step class name format"): + get_step_class_from_qualified_name("NoModule")