diff --git a/crates/basilica-sdk-python/CHANGELOG.md b/crates/basilica-sdk-python/CHANGELOG.md index 90a806e3..ce1e56f7 100644 --- a/crates/basilica-sdk-python/CHANGELOG.md +++ b/crates/basilica-sdk-python/CHANGELOG.md @@ -7,6 +7,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.29.3] - 2026-05-17 + +### Fixed +- `@basilica.distributed` / `@basilica.deployment` / `SourcePackager.from_function` + now filter the captured module-level imports down to those whose + bound names are actually referenced by the function body. Without + this filter, the v0.29.2 fix shipped every module-level import to + the worker pod — including `import basilica` and + `from basilica import ...` that are only used by the decorator + itself — which caused the worker to fail with + `ModuleNotFoundError: No module named 'basilica'` at runtime + (`basilica-sdk` is not installed in the trainer image). The filter + uses AST walking of the function body to collect referenced `Name` + / leftmost `Attribute` identifiers and emits only the matching + imports. Refs #477 follow-up. Cross-repo reference: + `one-covenant/basilica-backend#419` Stage 4 take-3 Cell B runtime + trace. + ## [0.29.2] - 2026-05-16 ### Fixed diff --git a/crates/basilica-sdk-python/Cargo.toml b/crates/basilica-sdk-python/Cargo.toml index d95a6f84..7592f8dd 100644 --- a/crates/basilica-sdk-python/Cargo.toml +++ b/crates/basilica-sdk-python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "basilica-sdk-python" -version = "0.29.2" +version = "0.29.3" edition = "2021" authors = ["Basilica Team"] description = "Python bindings for the Basilica SDK" diff --git a/crates/basilica-sdk-python/pyproject.toml b/crates/basilica-sdk-python/pyproject.toml index df5fd700..14ce2ec3 100644 --- a/crates/basilica-sdk-python/pyproject.toml +++ b/crates/basilica-sdk-python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "basilica-sdk" -version = "0.29.2" +version = "0.29.3" description = "Python SDK for deploying containerized applications on the Basilica GPU cloud" readme = "README.md" license = { text = "MIT OR Apache-2.0" } diff --git a/crates/basilica-sdk-python/python/basilica/decorators.py b/crates/basilica-sdk-python/python/basilica/decorators.py index aaa64b5c..7371f707 100644 --- a/crates/basilica-sdk-python/python/basilica/decorators.py +++ b/crates/basilica-sdk-python/python/basilica/decorators.py @@ -19,20 +19,48 @@ from basilica._basilica import HealthCheckConfig -def _extract_module_level_imports(func: Callable) -> str: +def _collect_referenced_names(func_source: str) -> set: """ - Extract module-level `import` and `from ... import ...` statements - from the module that defines `func`. + Walk the AST of `func_source` and collect every free `Name` and the + leftmost identifier of every `Attribute` chain (e.g. `os.environ.get` + contributes `os`). Local assignments and inside-body imports are NOT + excluded — that's fine because the import-prepending logic just + short-circuits on names the body genuinely never mentions. + """ + names: set = set() + try: + tree = ast.parse(func_source) + except SyntaxError: + return names + for node in ast.walk(tree): + if isinstance(node, ast.Name): + names.add(node.id) + elif isinstance(node, ast.Attribute): + # Find leftmost Name in the Attribute chain (e.g. `os` in `os.environ.get`). + cursor: ast.AST = node + while isinstance(cursor, ast.Attribute): + cursor = cursor.value + if isinstance(cursor, ast.Name): + names.add(cursor.id) + return names + + +def _extract_module_level_imports(func: Callable, func_source: str) -> str: + """ + Extract the subset of module-level `import` / `from ... import ...` + statements from the module that defines `func` whose bound names + are referenced inside `func_source`. The decorator-based deploy path ships only the function source via `inspect.getsource(func)`; module-level imports the body references - would otherwise be missing on the worker. Walking the defining - module's AST keeps just the import statements (not arbitrary - top-level state), so the shipped source can be exec'd in a clean - namespace without `NameError`. + would otherwise be missing on the worker. Filtering by actual usage + keeps unrelated module-level imports (e.g. `import basilica` used + only by the decorator) OUT of the worker source, so the worker pod + does not have to install packages it never references. Returns an empty string if the module source is unavailable - (interactive sessions, exec/eval contexts, frozen modules). + (interactive sessions, exec/eval contexts, frozen modules) or if no + module-level import binding is referenced by the function body. Refs: one-covenant/basilica#477, one-covenant/basilica-backend#419 (Stage 4 take-3 Cell B). @@ -48,23 +76,61 @@ def _extract_module_level_imports(func: Callable) -> str: tree = ast.parse(module_source) except SyntaxError: return "" + + referenced = _collect_referenced_names(func_source) + if not referenced: + return "" + lines: List[str] = [] for node in tree.body: - if isinstance(node, (ast.Import, ast.ImportFrom)): - try: - lines.append(ast.unparse(node)) - except AttributeError: - # Python < 3.9 has no `ast.unparse`; SDK requires - # >= 3.10 per pyproject.toml so this branch is unreachable - # at runtime, but keep it defensive. - lines.append( - inspect.getsource(module).splitlines()[node.lineno - 1] + if isinstance(node, ast.Import): + kept = [ + alias for alias in node.names + if _import_binds_referenced_name(alias, referenced) + ] + if kept: + filtered = ast.Import(names=kept) + lines.append(ast.unparse(filtered)) + elif isinstance(node, ast.ImportFrom): + kept = [ + alias for alias in node.names + if _from_import_binds_referenced_name(alias, referenced) + ] + if kept: + filtered = ast.ImportFrom( + module=node.module, names=kept, level=node.level ) + lines.append(ast.unparse(filtered)) if not lines: return "" return "\n".join(lines) + "\n" +def _import_binds_referenced_name(alias: ast.alias, referenced: set) -> bool: + """ + `import foo` binds `foo`. `import foo as bar` binds `bar`. + `import foo.bar` binds the top-level name `foo` (Python module + resolution). + """ + if alias.asname is not None: + return alias.asname in referenced + # `import foo.bar` → binds `foo`. + return alias.name.split(".")[0] in referenced + + +def _from_import_binds_referenced_name(alias: ast.alias, referenced: set) -> bool: + """ + `from x import foo` binds `foo`. `from x import foo as bar` binds `bar`. + `from x import *` is treated as referenced (we cannot know what names + it exposes without executing the import). + """ + if alias.name == "*": + return True + if alias.asname is not None: + return alias.asname in referenced + return alias.name in referenced + + class DeployedFunction: """ Wrapper around a decorated function with deployment capabilities. @@ -160,10 +226,12 @@ def _extract_source(self) -> str: func_source = '\n'.join(func_lines) func_source = textwrap.dedent(func_source) - # Prepend the defining module's top-level imports so names like - # `os`, `time`, `Optional` resolve when the body runs on the - # worker pod. Refs one-covenant/basilica#477. - imports = _extract_module_level_imports(self._func) + # Prepend the defining module's top-level imports whose bound + # names appear in the function body, so references like `os`, + # `time`, `Optional` resolve on the worker pod without dragging + # in unrelated imports (e.g. `import basilica` used only by the + # decorator). Refs one-covenant/basilica#477. + imports = _extract_module_level_imports(self._func, func_source) # Generate entry point that calls the function func_name = self._func.__name__ @@ -393,7 +461,7 @@ def _extract_source(self) -> str: func_lines = lines[def_idx:] func_source = textwrap.dedent("\n".join(func_lines)) - imports = _extract_module_level_imports(self._func) + imports = _extract_module_level_imports(self._func, func_source) func_name = self._func.__name__ return f"""{imports}{func_source} diff --git a/crates/basilica-sdk-python/python/basilica/source.py b/crates/basilica-sdk-python/python/basilica/source.py index 77ef9126..0b07f8f2 100644 --- a/crates/basilica-sdk-python/python/basilica/source.py +++ b/crates/basilica-sdk-python/python/basilica/source.py @@ -33,17 +33,55 @@ from .exceptions import SourceError -def _extract_module_level_imports(func: Callable) -> str: +def _collect_referenced_names(func_source: str) -> set: + """Walk the AST of `func_source` and collect free `Name` references and + the leftmost identifier of every `Attribute` chain (`os` in + `os.environ.get`).""" + names: set = set() + try: + tree = ast.parse(func_source) + except SyntaxError: + return names + for node in ast.walk(tree): + if isinstance(node, ast.Name): + names.add(node.id) + elif isinstance(node, ast.Attribute): + cursor: ast.AST = node + while isinstance(cursor, ast.Attribute): + cursor = cursor.value + if isinstance(cursor, ast.Name): + names.add(cursor.id) + return names + + +def _import_binds_referenced_name(alias: ast.alias, referenced: set) -> bool: + if alias.asname is not None: + return alias.asname in referenced + return alias.name.split(".")[0] in referenced + + +def _from_import_binds_referenced_name(alias: ast.alias, referenced: set) -> bool: + if alias.name == "*": + return True + if alias.asname is not None: + return alias.asname in referenced + return alias.name in referenced + + +def _extract_module_level_imports(func: Callable, func_source: str) -> str: """ - Extract module-level `import` / `from ... import ...` statements - from the module that defines `func`. + Extract the subset of module-level `import` / `from ... import ...` + statements from the module that defines `func` whose bound names + are referenced inside `func_source`. - Walking the defining module's AST keeps just the import statements - (not arbitrary top-level state), so the shipped source can be exec'd - in a clean namespace without `NameError` on the worker pod. + Filtering by actual usage keeps unrelated module-level imports + (e.g. `import basilica` used only by the decorator) OUT of the + worker source, so the worker pod does not have to install + packages it never references. Returns an empty string if the module source is unavailable - (interactive sessions, exec/eval contexts, frozen modules). + (interactive sessions, exec/eval contexts, frozen modules) or if no + module-level import binding is referenced by the function body. Refs one-covenant/basilica#477. """ @@ -58,13 +96,31 @@ def _extract_module_level_imports(func: Callable) -> str: tree = ast.parse(module_source) except SyntaxError: return "" + + referenced = _collect_referenced_names(func_source) + if not referenced: + return "" + lines: List[str] = [] for node in tree.body: - if isinstance(node, (ast.Import, ast.ImportFrom)): - try: - lines.append(ast.unparse(node)) - except AttributeError: - lines.append(module_source.splitlines()[node.lineno - 1]) + if isinstance(node, ast.Import): + kept = [ + alias for alias in node.names + if _import_binds_referenced_name(alias, referenced) + ] + if kept: + lines.append(ast.unparse(ast.Import(names=kept))) + elif isinstance(node, ast.ImportFrom): + kept = [ + alias for alias in node.names + if _from_import_binds_referenced_name(alias, referenced) + ] + if kept: + lines.append( + ast.unparse( + ast.ImportFrom(module=node.module, names=kept, level=node.level) + ) + ) if not lines: return "" return "\n".join(lines) + "\n" @@ -416,10 +472,11 @@ def from_function(func: Callable, call: bool = True) -> "SourcePackager": import textwrap source = textwrap.dedent(source) - # Prepend the defining module's top-level imports so names the - # body relies on resolve on the worker pod. Refs - # one-covenant/basilica#477. - imports = _extract_module_level_imports(func) + # Prepend the defining module's top-level imports whose bound + # names the body references, so names the body relies on resolve + # on the worker pod (without dragging in unrelated imports). + # Refs one-covenant/basilica#477. + imports = _extract_module_level_imports(func, source) if imports: source = imports + source diff --git a/crates/basilica-sdk-python/tests/test_decorator_source_extraction.py b/crates/basilica-sdk-python/tests/test_decorator_source_extraction.py index 70b030c8..f8ecbbcb 100644 --- a/crates/basilica-sdk-python/tests/test_decorator_source_extraction.py +++ b/crates/basilica-sdk-python/tests/test_decorator_source_extraction.py @@ -68,6 +68,24 @@ def _train_no_module_imports() -> None: print("hello") +@basilica.distributed( + name="test-dist-unused-filter", + image="ignored:latest", + world_size=WorldSize(min=1, target=1, max=1), + gpu_count=1, + gpu_models=["H100"], + provider_filter=ProviderFilter(include=["hyperstack"]), +) +def _train_uses_os_only() -> None: + """ + Fixture: references only `os` from module scope. The decorator's + captured imports must NOT include `basilica`, `time`, `pytest`, + `Optional` etc. that are imported in this test module but unused + by the body. + """ + print(os.environ.get("X", "fallback")) + + class TestDistributedSourceCapturesModuleImports: """ Pin contract: `@basilica.distributed`'s extracted source must include @@ -117,6 +135,42 @@ def test_extracted_source_executes_without_name_error(self) -> None: exec(compiled, ns) assert "_train_with_module_imports" in ns + def test_extracted_source_filters_unused_module_imports(self) -> None: + """ + The extracted source must NOT include module-level imports that + the body never references. Specifically: a body that only uses + `os` should NOT carry `import basilica`, `import time`, + `import pytest`, `from typing import Optional`, etc. on its + head, because the worker container does not have those + installed and the import would fail at module-eval time. + + Regression: pre-second-fix the decorator captured ALL + module-level imports (e.g. `import basilica`), which caused the + worker pod to raise `ModuleNotFoundError: No module named + 'basilica'` on the runtime path. See task D2 (basilica-backend + #419 Stage 4 take-3) runtime trace. + """ + source = _train_uses_os_only._extract_source() + head = source.split("def ")[0] + assert "import os" in head, ( + f"Expected `import os` (the only referenced module-level " + f"import) in head; got:\n{head!r}" + ) + # These are imported at module level in this test file but the + # function body does not reference them; they must be filtered. + assert "import basilica" not in head, ( + f"Expected `import basilica` filtered out (unused by body); " + f"got:\n{head!r}" + ) + assert "import pytest" not in head, ( + f"Expected `import pytest` filtered out (unused by body); " + f"got:\n{head!r}" + ) + assert "from typing" not in head, ( + f"Expected `from typing import Optional` filtered out " + f"(unused by body); got:\n{head!r}" + ) + # ============================================================================= # @basilica.deployment source extraction (same code path, same bug)