Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions crates/basilica-sdk-python/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.29.3] - 2026-05-17

### Fixed
- `@basilica.distributed` / `@basilica.deployment` / `SourcePackager.from_function`
now filter the captured module-level imports down to those whose
bound names are actually referenced by the function body. Without
this filter, the v0.29.2 fix shipped every module-level import to
the worker pod — including `import basilica` and
`from basilica import ...` that are only used by the decorator
itself — which caused the worker to fail with
`ModuleNotFoundError: No module named 'basilica'` at runtime
(`basilica-sdk` is not installed in the trainer image). The filter
uses AST walking of the function body to collect referenced `Name`
/ leftmost `Attribute` identifiers and emits only the matching
imports. Refs #477 follow-up. Cross-repo reference:
`one-covenant/basilica-backend#419` Stage 4 take-3 Cell B runtime
trace.

## [0.29.2] - 2026-05-16

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion crates/basilica-sdk-python/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "basilica-sdk-python"
version = "0.29.2"
version = "0.29.3"
edition = "2021"
authors = ["Basilica Team"]
description = "Python bindings for the Basilica SDK"
Expand Down
2 changes: 1 addition & 1 deletion crates/basilica-sdk-python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "maturin"

[project]
name = "basilica-sdk"
version = "0.29.2"
version = "0.29.3"
description = "Python SDK for deploying containerized applications on the Basilica GPU cloud"
readme = "README.md"
license = { text = "MIT OR Apache-2.0" }
Expand Down
112 changes: 90 additions & 22 deletions crates/basilica-sdk-python/python/basilica/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,48 @@
from basilica._basilica import HealthCheckConfig


def _extract_module_level_imports(func: Callable) -> str:
def _collect_referenced_names(func_source: str) -> set:
"""
Extract module-level `import` and `from ... import ...` statements
from the module that defines `func`.
Walk the AST of `func_source` and collect every free `Name` and the
leftmost identifier of every `Attribute` chain (e.g. `os.environ.get`
contributes `os`). Local assignments and inside-body imports are NOT
excluded — that's fine because the import-prepending logic just
short-circuits on names the body genuinely never mentions.
"""
names: set = set()
try:
tree = ast.parse(func_source)
except SyntaxError:
return names
for node in ast.walk(tree):
if isinstance(node, ast.Name):
names.add(node.id)
elif isinstance(node, ast.Attribute):
# Find leftmost Name in the Attribute chain (e.g. `os` in `os.environ.get`).
cursor: ast.AST = node
while isinstance(cursor, ast.Attribute):
cursor = cursor.value
if isinstance(cursor, ast.Name):
names.add(cursor.id)
return names


def _extract_module_level_imports(func: Callable, func_source: str) -> str:
"""
Extract the subset of module-level `import` / `from ... import ...`
statements from the module that defines `func` whose bound names
are referenced inside `func_source`.

The decorator-based deploy path ships only the function source via
`inspect.getsource(func)`; module-level imports the body references
would otherwise be missing on the worker. Walking the defining
module's AST keeps just the import statements (not arbitrary
top-level state), so the shipped source can be exec'd in a clean
namespace without `NameError`.
would otherwise be missing on the worker. Filtering by actual usage
keeps unrelated module-level imports (e.g. `import basilica` used
only by the decorator) OUT of the worker source, so the worker pod
does not have to install packages it never references.

Returns an empty string if the module source is unavailable
(interactive sessions, exec/eval contexts, frozen modules).
(interactive sessions, exec/eval contexts, frozen modules) or if no
module-level import binding is referenced by the function body.

Refs: one-covenant/basilica#477,
one-covenant/basilica-backend#419 (Stage 4 take-3 Cell B).
Expand All @@ -48,23 +76,61 @@ def _extract_module_level_imports(func: Callable) -> str:
tree = ast.parse(module_source)
except SyntaxError:
return ""

referenced = _collect_referenced_names(func_source)
if not referenced:
return ""

lines: List[str] = []
for node in tree.body:
if isinstance(node, (ast.Import, ast.ImportFrom)):
try:
lines.append(ast.unparse(node))
except AttributeError:
# Python < 3.9 has no `ast.unparse`; SDK requires
# >= 3.10 per pyproject.toml so this branch is unreachable
# at runtime, but keep it defensive.
lines.append(
inspect.getsource(module).splitlines()[node.lineno - 1]
if isinstance(node, ast.Import):
kept = [
alias for alias in node.names
if _import_binds_referenced_name(alias, referenced)
]
if kept:
filtered = ast.Import(names=kept)
lines.append(ast.unparse(filtered))
elif isinstance(node, ast.ImportFrom):
kept = [
alias for alias in node.names
if _from_import_binds_referenced_name(alias, referenced)
]
if kept:
filtered = ast.ImportFrom(
module=node.module, names=kept, level=node.level
)
lines.append(ast.unparse(filtered))
if not lines:
return ""
return "\n".join(lines) + "\n"


def _import_binds_referenced_name(alias: ast.alias, referenced: set) -> bool:
"""
`import foo` binds `foo`. `import foo as bar` binds `bar`.
`import foo.bar` binds the top-level name `foo` (Python module
resolution).
"""
if alias.asname is not None:
return alias.asname in referenced
# `import foo.bar` → binds `foo`.
return alias.name.split(".")[0] in referenced


def _from_import_binds_referenced_name(alias: ast.alias, referenced: set) -> bool:
"""
`from x import foo` binds `foo`. `from x import foo as bar` binds `bar`.
`from x import *` is treated as referenced (we cannot know what names
it exposes without executing the import).
"""
if alias.name == "*":
return True
if alias.asname is not None:
return alias.asname in referenced
return alias.name in referenced


class DeployedFunction:
"""
Wrapper around a decorated function with deployment capabilities.
Expand Down Expand Up @@ -160,10 +226,12 @@ def _extract_source(self) -> str:
func_source = '\n'.join(func_lines)
func_source = textwrap.dedent(func_source)

# Prepend the defining module's top-level imports so names like
# `os`, `time`, `Optional` resolve when the body runs on the
# worker pod. Refs one-covenant/basilica#477.
imports = _extract_module_level_imports(self._func)
# Prepend the defining module's top-level imports whose bound
# names appear in the function body, so references like `os`,
# `time`, `Optional` resolve on the worker pod without dragging
# in unrelated imports (e.g. `import basilica` used only by the
# decorator). Refs one-covenant/basilica#477.
imports = _extract_module_level_imports(self._func, func_source)

# Generate entry point that calls the function
func_name = self._func.__name__
Expand Down Expand Up @@ -393,7 +461,7 @@ def _extract_source(self) -> str:

func_lines = lines[def_idx:]
func_source = textwrap.dedent("\n".join(func_lines))
imports = _extract_module_level_imports(self._func)
imports = _extract_module_level_imports(self._func, func_source)
func_name = self._func.__name__
return f"""{imports}{func_source}

Expand Down
89 changes: 73 additions & 16 deletions crates/basilica-sdk-python/python/basilica/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,55 @@
from .exceptions import SourceError


def _extract_module_level_imports(func: Callable) -> str:
def _collect_referenced_names(func_source: str) -> set:
"""Walk the AST of `func_source` and collect free `Name` references and
the leftmost identifier of every `Attribute` chain (`os` in
`os.environ.get`)."""
names: set = set()
try:
tree = ast.parse(func_source)
except SyntaxError:
return names
for node in ast.walk(tree):
if isinstance(node, ast.Name):
names.add(node.id)
elif isinstance(node, ast.Attribute):
cursor: ast.AST = node
while isinstance(cursor, ast.Attribute):
cursor = cursor.value
if isinstance(cursor, ast.Name):
names.add(cursor.id)
return names


def _import_binds_referenced_name(alias: ast.alias, referenced: set) -> bool:
if alias.asname is not None:
return alias.asname in referenced
return alias.name.split(".")[0] in referenced


def _from_import_binds_referenced_name(alias: ast.alias, referenced: set) -> bool:
if alias.name == "*":
return True
if alias.asname is not None:
return alias.asname in referenced
return alias.name in referenced


def _extract_module_level_imports(func: Callable, func_source: str) -> str:
"""
Extract module-level `import` / `from ... import ...` statements
from the module that defines `func`.
Extract the subset of module-level `import` / `from ... import ...`
statements from the module that defines `func` whose bound names
are referenced inside `func_source`.

Walking the defining module's AST keeps just the import statements
(not arbitrary top-level state), so the shipped source can be exec'd
in a clean namespace without `NameError` on the worker pod.
Filtering by actual usage keeps unrelated module-level imports
(e.g. `import basilica` used only by the decorator) OUT of the
worker source, so the worker pod does not have to install
packages it never references.

Returns an empty string if the module source is unavailable
(interactive sessions, exec/eval contexts, frozen modules).
(interactive sessions, exec/eval contexts, frozen modules) or if no
module-level import binding is referenced by the function body.

Refs one-covenant/basilica#477.
"""
Expand All @@ -58,13 +96,31 @@ def _extract_module_level_imports(func: Callable) -> str:
tree = ast.parse(module_source)
except SyntaxError:
return ""

referenced = _collect_referenced_names(func_source)
if not referenced:
return ""

lines: List[str] = []
for node in tree.body:
if isinstance(node, (ast.Import, ast.ImportFrom)):
try:
lines.append(ast.unparse(node))
except AttributeError:
lines.append(module_source.splitlines()[node.lineno - 1])
if isinstance(node, ast.Import):
kept = [
alias for alias in node.names
if _import_binds_referenced_name(alias, referenced)
]
if kept:
lines.append(ast.unparse(ast.Import(names=kept)))
elif isinstance(node, ast.ImportFrom):
kept = [
alias for alias in node.names
if _from_import_binds_referenced_name(alias, referenced)
]
if kept:
lines.append(
ast.unparse(
ast.ImportFrom(module=node.module, names=kept, level=node.level)
)
)
if not lines:
return ""
return "\n".join(lines) + "\n"
Expand Down Expand Up @@ -416,10 +472,11 @@ def from_function(func: Callable, call: bool = True) -> "SourcePackager":
import textwrap
source = textwrap.dedent(source)

# Prepend the defining module's top-level imports so names the
# body relies on resolve on the worker pod. Refs
# one-covenant/basilica#477.
imports = _extract_module_level_imports(func)
# Prepend the defining module's top-level imports whose bound
# names the body references, so names the body relies on resolve
# on the worker pod (without dragging in unrelated imports).
# Refs one-covenant/basilica#477.
imports = _extract_module_level_imports(func, source)
if imports:
source = imports + source

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,24 @@ def _train_no_module_imports() -> None:
print("hello")


@basilica.distributed(
name="test-dist-unused-filter",
image="ignored:latest",
world_size=WorldSize(min=1, target=1, max=1),
gpu_count=1,
gpu_models=["H100"],
provider_filter=ProviderFilter(include=["hyperstack"]),
)
def _train_uses_os_only() -> None:
"""
Fixture: references only `os` from module scope. The decorator's
captured imports must NOT include `basilica`, `time`, `pytest`,
`Optional` etc. that are imported in this test module but unused
by the body.
"""
print(os.environ.get("X", "fallback"))


class TestDistributedSourceCapturesModuleImports:
"""
Pin contract: `@basilica.distributed`'s extracted source must include
Expand Down Expand Up @@ -117,6 +135,42 @@ def test_extracted_source_executes_without_name_error(self) -> None:
exec(compiled, ns)
assert "_train_with_module_imports" in ns

def test_extracted_source_filters_unused_module_imports(self) -> None:
"""
The extracted source must NOT include module-level imports that
the body never references. Specifically: a body that only uses
`os` should NOT carry `import basilica`, `import time`,
`import pytest`, `from typing import Optional`, etc. on its
head, because the worker container does not have those
installed and the import would fail at module-eval time.

Regression: pre-second-fix the decorator captured ALL
module-level imports (e.g. `import basilica`), which caused the
worker pod to raise `ModuleNotFoundError: No module named
'basilica'` on the runtime path. See task D2 (basilica-backend
#419 Stage 4 take-3) runtime trace.
"""
source = _train_uses_os_only._extract_source()
head = source.split("def ")[0]
assert "import os" in head, (
f"Expected `import os` (the only referenced module-level "
f"import) in head; got:\n{head!r}"
)
# These are imported at module level in this test file but the
# function body does not reference them; they must be filtered.
assert "import basilica" not in head, (
f"Expected `import basilica` filtered out (unused by body); "
f"got:\n{head!r}"
)
assert "import pytest" not in head, (
f"Expected `import pytest` filtered out (unused by body); "
f"got:\n{head!r}"
)
assert "from typing" not in head, (
f"Expected `from typing import Optional` filtered out "
f"(unused by body); got:\n{head!r}"
)


# =============================================================================
# @basilica.deployment source extraction (same code path, same bug)
Expand Down
Loading