diff --git a/.celestoignore.example b/.celestoignore.example new file mode 100644 index 0000000..0378dec --- /dev/null +++ b/.celestoignore.example @@ -0,0 +1,76 @@ +# Celesto Agent Deployment - Example .celestoignore +# +# This file specifies patterns for files and directories to exclude +# from deployment. Format is identical to .gitignore. +# +# Copy this to .celestoignore in your agent directory and customize. +# +# Inline comments (after #) are supported on pattern lines. + +# Python +__pycache__/ # Python bytecode cache directory +*.py[cod] # Compiled Python files +*$py.class +*.so # Shared object files +.Python + +# Virtual environments +venv/ # Virtual environment folders +.venv/ +env/ +ENV/ + +# Environment and secrets +.env # Environment variables +.env.* +*.key # Private keys +*.pem # Certificate files +secrets/ # Secrets directory + +# IDE and editors +.vscode/ # VSCode settings +.idea/ # JetBrains IDE settings +*.swp # Vim swap files +*.swo +*~ # Backup files +.DS_Store # macOS metadata + +# Version control +.git/ # Git repository +.gitignore # Git ignore file + +# Dependencies +node_modules/ # Node.js dependencies +vendor/ # Vendor dependencies + +# Build artifacts +dist/ # Distribution files +build/ # Build output +*.egg-info/ # Python package metadata + +# Tests +tests/ # Test directory +test_*/ +*_test.py # Test files +*.test.py + +# Documentation (if not needed at runtime) +docs/ # Documentation directory +*.md # Markdown files + +# Logs and temporary files +*.log # Log files +logs/ # Logs directory +tmp/ # Temporary files +temp/ +*.tmp + +# Data files (if large or sensitive) +data/ # Data directory +*.csv # CSV data files +*.db # Database files +*.sqlite + +# Configuration (if sensitive) +config.local.* # Local configuration overrides +.env.local diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..947264f --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,470 @@ +# Celesto SDK - AI Assistant Guide + +This document provides guidance for AI assistants working on the Celesto SDK codebase. + +## Project Overview + +**Celesto SDK** is a Python client library and CLI tool for the [Celesto AI platform](https://celesto.ai). It enables developers to: +- Deploy AI agents to managed infrastructure with automatic scaling +- Manage delegated access to user resources (Google Drive, etc.) via GateKeeper +- Interact with the platform through both programmatic SDK and command-line interfaces + +**Current Version:** 0.0.2 +**License:** Apache 2.0 +**Python:** 3.10+ +**API Base:** `https://api.celesto.ai/v1` + +## Repository Structure + +``` +celesto-sdk/ +├── src/celesto/ # Main package +│ ├── sdk/ # SDK implementation +│ │ ├── client.py # Core SDK classes: CelestoSDK, Deployment, GateKeeper +│ │ ├── exceptions.py # Custom exception hierarchy +│ │ ├── types.py # Type definitions +│ │ └── __init__.py # SDK public API exports +│ ├── main.py # CLI entry point (Typer app) +│ ├── deployment.py # CLI deployment commands +│ ├── a2a.py # CLI agent-to-agent commands +│ ├── proxy.py # CLI MCP proxy helper +│ └── __init__.py # Package version and exports +├── tests/ # Test suite +│ ├── test_sdk.py # SDK unit tests +│ └── test_deployment.py # Deployment tests +├── pyproject.toml # Project metadata, dependencies, tooling config +├── README.md # User-facing documentation +├── AGENTS.md # GitHub Copilot instructions +└── LICENSE # Apache 2.0 license +``` + +## Architecture + +### Three-Layer Design + +1. **SDK Layer** ([src/celesto/sdk/client.py](src/celesto/sdk/client.py)) + - `CelestoSDK`: Main client class with context manager support + - `Deployment`: Agent deployment operations + - `GateKeeper`: Delegated access management + - `_BaseConnection`: HTTP session and authentication management + - `_BaseClient`: Shared HTTP request handling and error processing + +2. **CLI Layer** ([src/celesto/main.py](src/celesto/main.py)) + - Typer-based CLI application + - Commands: `deploy`, `list`/`ls`, `a2a`, `proxy` + - Rich console output for better UX + +3. **Shared Infrastructure** + - Exception hierarchy in [sdk/exceptions.py](src/celesto/sdk/exceptions.py) + - Type definitions in [sdk/types.py](src/celesto/sdk/types.py) + +## Key Components + +### 1. CelestoSDK Client + +**File:** [src/celesto/sdk/client.py](src/celesto/sdk/client.py) + +The main SDK client provides a unified interface to all Celesto services: + +```python +with CelestoSDK() as client: + # Deployment operations + client.deployment.deploy(...) + client.deployment.list() + + # GateKeeper operations + client.gatekeeper.connect(...) + client.gatekeeper.list_drive_files(...) +``` + +**Important patterns:** +- Context manager for automatic resource cleanup +- API key auto-detection from `CELESTO_API_KEY` environment variable +- Project resolution from `CELESTO_PROJECT_NAME` or first available project +- httpx-based HTTP client with bearer token authentication + +### 2. Deployment API + +**Class:** `Deployment` in [src/celesto/sdk/client.py](src/celesto/sdk/client.py:229) + +**Key methods:** +- `deploy(folder, name, description, envs, project_name)` - Deploy agent from local folder +- `list()` - List all deployments +- `_resolve_project_id(project_name)` - Convert project name to ID +- `_load_ignore_patterns(folder)` - Load `.celestoignore` patterns if present +- `_create_deployment(bundle, name, description, envs, project_id)` - Upload tar.gz bundle + +**Deployment flow:** +1. Resolve project ID (by name or use first available) +2. Load `.celestoignore` patterns if file exists +3. Recursively walk directory tree, filtering ignored files/directories +4. Create tar.gz archive of agent folder (excluding ignored items) +5. Upload as multipart form data with metadata +6. Return deployment status (READY or BUILDING) + +**File filtering with .celestoignore:** +- Place a `.celestoignore` file in your agent folder to exclude files from deployment +- Format is identical to `.gitignore` (uses gitignore-style pattern matching) +- Supports patterns like `*.pyc`, `__pycache__/`, `node_modules/`, `.env`, etc. +- Comments (lines starting with `#`) and empty lines are ignored +- Inline comments supported: ` #` (space before `#`) starts a comment; `#` without space is literal (e.g., `file#name`) +- Directories are filtered before recursion for efficiency +- Implementation uses the `pathspec` library with `gitignore` pattern type + +### 3. GateKeeper API + +**Class:** `GateKeeper` in [src/celesto/sdk/client.py](src/celesto/sdk/client.py:404) + +**Key methods:** +- `connect(subject, project_name, provider, redirect_uri)` - Initiate OAuth connection +- `list_connections(project_name, status_filter)` - List all connections +- `revoke_connection(subject, project_name, provider)` - Revoke access +- `list_drive_files(project_name, subject, ...)` - List user's Google Drive files +- `update_access_rules(subject, project_name, allowed_folders, allowed_files)` - Set access restrictions +- `clear_access_rules(connection_id)` - Remove all restrictions + +**Access control model:** +- **Subject**: Unique identifier for end-user (e.g., "user:email@example.com") +- **Connection**: OAuth authorization between subject and provider +- **Access Rules**: Optional restrictions on accessible files/folders +- **Unrestricted**: Default state with full access to user's resources + +### 4. Exception Hierarchy + +**File:** [src/celesto/sdk/exceptions.py](src/celesto/sdk/exceptions.py) + +``` +CelestoError (base) +├── CelestoAuthenticationError (401/403) +├── CelestoNotFoundError (404) +├── CelestoValidationError (400/422) +├── CelestoRateLimitError (429) - includes retry_after +├── CelestoServerError (5xx) +└── CelestoNetworkError (connection failures) +``` + +All exceptions include `message` and optional `response` attributes. + +### 5. CLI Commands + +**File:** [src/celesto/main.py](src/celesto/main.py) + +```bash +celesto deploy # Deploy agent (interactive or --folder) +celesto list / ls # List deployments +celesto a2a # Agent-to-agent utilities +celesto proxy # MCP proxy commands +``` + +## Development Setup + +### Prerequisites +- Python 3.10 or higher +- `uv` (recommended) or `pip` + +### Installation + +```bash +# With uv (recommended) +pip install uv +uv venv +uv sync + +# With pip +pip install -e . +``` + +### Running Tests + +```bash +uv run pytest +# or +pytest +``` + +### Code Quality + +```bash +# Linting +uv run ruff check . + +# Formatting +uv run ruff format . + +# Both at once +uv run ruff check . && uv run ruff format . +``` + +## Development Guidelines + +### Code Style + +1. **Formatting:** Ruff (similar to Black) +2. **Import sorting:** isort profile in Ruff +3. **Type hints:** Use throughout, especially in public APIs +4. **Docstrings:** Google-style docstrings for all public methods +5. **Line length:** Follows Ruff defaults + +### Error Handling Patterns + +Always map HTTP status codes to specific exceptions: + +```python +def _handle_response(self, response: httpx.Response) -> Any: + if status in (200, 201, 204): + return response.json() + if status in (401, 403): + raise CelestoAuthenticationError(...) + if status == 404: + raise CelestoNotFoundError(...) + # ... etc +``` + +### Authentication Flow + +1. Check explicit `api_key` parameter +2. Fall back to `CELESTO_API_KEY` environment variable +3. Raise `CelestoAuthenticationError` if not found +4. Add to session headers as `Authorization: Bearer ` + +### Project Resolution + +For deployment operations: +1. Check method `project_name` parameter +2. Fall back to `CELESTO_PROJECT_NAME` environment variable +3. If not set, use first project from `/projects/` API +4. Paginate through projects to find match if needed + +### Resource Management + +Always support context manager protocol: + +```python +class CelestoSDK(_BaseConnection): + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def close(self): + self.session.close() +``` + +## Common Tasks + +### Adding a New SDK Method + +1. Add method to appropriate client class (`Deployment` or `GateKeeper`) +2. Use `self._request()` for HTTP calls with proper error handling +3. Add docstring with Args, Returns, Raises, and Example sections +4. Update type hints in [sdk/types.py](src/celesto/sdk/types.py) if needed +5. Add tests in [tests/test_sdk.py](tests/test_sdk.py) + +### Adding a New CLI Command + +1. Add function to appropriate CLI module ([deployment.py](src/celesto/deployment.py), [a2a.py](src/celesto/a2a.py), etc.) +2. Use Typer decorators for arguments/options +3. Import and register in [main.py](src/celesto/main.py) +4. Use Rich for console output +5. Handle errors with try/except and user-friendly messages + +### Updating API Endpoints + +The API base URL is configurable: +- Default: `https://api.celesto.ai/v1` +- Override: `CELESTO_BASE_URL` environment variable +- Constructor: `CelestoSDK(base_url="...")` + +When adding new endpoints: +- Use relative paths (e.g., `/deploy/apps`, not full URL) +- Include trailing slashes consistently (see [client.py:260](src/celesto/sdk/client.py#L260)) +- Document query parameters and request body format + +### Working with .celestoignore + +When deploying agents, users can create a `.celestoignore` file to exclude files and directories from deployment: + +**Example .celestoignore:** +```gitignore +# Python +__pycache__/ +*.pyc +*.pyo +*.pyd + +# Virtual environments +venv/ +.venv/ +env/ + +# Environment files +.env +.env.local + +# IDE +.vscode/ +.idea/ +*.swp + +# Dependencies +node_modules/ + +# Build artifacts +dist/ +build/ + +# Tests +tests/ +*.test.py + +# Logs +*.log +logs/ +``` + +**Implementation details:** +- Uses `pathspec` library with `gitignore` pattern matching (same as git) +- Loaded via `_load_ignore_patterns()` method in `Deployment` class +- Directories are filtered before recursion for performance +- Files are checked with forward-slash paths for cross-platform compatibility +- Empty lines and comments (starting with `#`) are automatically filtered out +- Inline comments supported per gitignore spec: ` #` (space before `#`) starts a comment, but `#` without preceding space is literal (e.g., `file#name` matches literally) +- If `.celestoignore` doesn't exist, deployment proceeds without filtering +- If `.celestoignore` can't be read or parsed, a warning is printed to stderr and deployment continues without filtering +- Comprehensive test suite in [tests/test_celestoignore.py](tests/test_celestoignore.py) and [tests/test_celestoignore_spec.py](tests/test_celestoignore_spec.py) + +### Working with Multipart Uploads + +For file uploads (like deployment bundles): + +```python +with open(bundle, "rb") as f: + files = {"code_bundle": ("app_bundle.tar.gz", f.read(), "application/gzip")} + return self._request("POST", "/deploy/agent", files=files, data=form_data) +``` + +Note: Don't use `json_body` with `files` - use `data` for form fields. + +## Testing + +### Test Structure + +- [tests/test_sdk.py](tests/test_sdk.py) - SDK client unit tests +- [tests/test_deployment.py](tests/test_deployment.py) - Deployment-specific tests + +### Running Tests + +```bash +# All tests +uv run pytest + +# Specific test file +uv run pytest tests/test_sdk.py + +# With verbose output +uv run pytest -v + +# With coverage +uv run pytest --cov=src/celesto +``` + +### Test Dependencies + +- pytest >= 8.4.1 +- pytest-asyncio >= 0.21.0 (for async tests if needed) + +## Important Notes + +### Breaking Changes + +This is a v0.x SDK, so: +- Public API may change between minor versions +- Always maintain backward compatibility within patch versions +- Document breaking changes in commit messages + +### Security Considerations + +- Never log API keys or sensitive data +- Validate all file paths before operations +- Use tarfile safely (avoid path traversal) +- Don't include credentials in error messages + +### Dependencies + +**Core:** +- httpx >= 0.27.0 (HTTP client) +- typer >= 0.20.0 (CLI framework) +- rich >= 14.0.0 (console output) +- python-dotenv >= 1.0.0 (environment variables) +- pathspec >= 0.11.0 (gitignore-style pattern matching for .celestoignore) +- a2a-sdk >= 0.3.10 (agent-to-agent protocol) +- fastmcp >= 2.7.1 (MCP support) + +**Dev:** +- pytest >= 8.4.1 +- ruff >= 0.12.4 + +### Release Process + +1. Update version in [src/celesto/__init__.py](src/celesto/__init__.py) +2. Update [README.md](README.md) if needed +3. Commit changes +4. Tag release: `git tag v0.0.x` +5. Push to GitHub: `git push origin main --tags` +6. Build and publish to PyPI + +## API Reference + +### Deployment Endpoints + +- `POST /deploy/agent` - Deploy new agent (multipart form) +- `GET /deploy/apps` - List deployments +- `GET /projects/` - List projects (with pagination) + +### GateKeeper Endpoints + +- `POST /gatekeeper/connect` - Initiate OAuth connection +- `GET /gatekeeper/connections` - List connections +- `GET /gatekeeper/connections/{id}` - Get connection details +- `DELETE /gatekeeper/connections` - Revoke connection by subject +- `GET /gatekeeper/connectors/drive/files` - List Drive files +- `GET /gatekeeper/connections/{id}/access-rules` - Get access rules +- `PUT /gatekeeper/connections/access-rules` - Update access rules by subject +- `DELETE /gatekeeper/connections/{id}/access-rules` - Clear access rules + +## Troubleshooting + +### Common Issues + +**"API key not found"** +- Set `CELESTO_API_KEY` environment variable +- Or pass `api_key=` parameter to `CelestoSDK()` +- Get key from https://celesto.ai → Settings → Security + +**"Project not found"** +- Set `CELESTO_PROJECT_NAME` environment variable +- Or pass `project_name=` parameter to methods +- Verify project exists at https://celesto.ai + +**Import errors** +- Run `uv sync` or `pip install -e .` +- Check Python version >= 3.10 + +**Tests failing** +- Ensure dependencies are installed +- Check for API changes (mocks may be outdated) +- Run `uv run pytest -v` for detailed output + +## Additional Resources + +- **API Documentation:** https://docs.celesto.ai/celesto-sdk +- **Platform Guide:** https://celesto.ai/docs +- **Repository:** https://github.com/CelestoAI/sdk +- **Issue Tracker:** https://github.com/CelestoAI/sdk/issues +- **PyPI:** https://pypi.org/project/celesto/ + +## Contact + +- **Support:** support@celesto.ai +- **Maintainer:** Aniket Maurya (aniket@celesto.ai) diff --git a/README.md b/README.md index 8165fdd..3dd8cd0 100644 --- a/README.md +++ b/README.md @@ -148,6 +148,38 @@ print(f"Status: {result['status']}") # "READY" or "BUILDING" - `envs` (dict, optional): Environment variables for your agent - `project_name` (str, optional): Project to deploy to (defaults to `CELESTO_PROJECT_NAME` or first project) +**Excluding Files with .celestoignore:** + +Create a `.celestoignore` file in your agent folder to exclude files and directories from deployment. The format is identical to `.gitignore`: + +```gitignore +# Python artifacts +__pycache__/ +*.pyc +*.pyo + +# Virtual environments +venv/ +.venv/ + +# Environment files +.env +.env.local + +# Development files +.vscode/ +.git/ + +# Dependencies +node_modules/ + +# Tests +tests/ +*.test.py +``` + +Files and directories matching these patterns will be automatically excluded from the deployment bundle. + #### List Deployments ```python diff --git a/pyproject.toml b/pyproject.toml index 5ee32d1..425188b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ dependencies = [ "a2a-sdk>=0.3.10", "fastmcp>=2.7.1", "httpx>=0.27.0", + "pathspec>=0.11.0", "python-dotenv>=1.0.0", "rich>=14.0.0", "typer>=0.20.0", diff --git a/src/celesto/sdk/client.py b/src/celesto/sdk/client.py index e4e2e83..2b59833 100644 --- a/src/celesto/sdk/client.py +++ b/src/celesto/sdk/client.py @@ -1,11 +1,13 @@ import json import os +import sys import tarfile import tempfile from pathlib import Path from typing import Any, List, Literal, Optional import httpx +import pathspec from .exceptions import ( CelestoAuthenticationError, @@ -297,6 +299,55 @@ def _resolve_first_project_id(self) -> str: ) return project_id + def _load_ignore_patterns(self, folder: Path) -> pathspec.PathSpec | None: + """Load ignore patterns from .celestoignore file if it exists. + + Args: + folder: The folder to search for .celestoignore + + Returns: + PathSpec object if .celestoignore exists, None otherwise + """ + ignore_file = folder / ".celestoignore" + if not ignore_file.exists(): + return None + + try: + with open(ignore_file, "r", encoding="utf-8") as f: + patterns = f.read().splitlines() + + # Process patterns according to gitignore spec: + # 1. Lines starting with # (after whitespace) are comments + # 2. Inline comments: # preceded by space (e.g., "pattern # comment") + # 3. # without preceding space is literal (e.g., "file#name") + processed_patterns = [] + for line in patterns: + # Strip inline comments: only ' #' (space followed by #) starts a comment + # Find the first occurrence of ' #' pattern + space_hash_idx = line.find(' #') + if space_hash_idx >= 0: + # Strip from the space before # onwards + line = line[:space_hash_idx] + + # Strip leading/trailing whitespace + line = line.strip() + + # Skip empty lines and full-line comments (lines starting with #) + if not line or line.startswith('#'): + continue + + processed_patterns.append(line) + + return pathspec.PathSpec.from_lines("gitignore", processed_patterns) + except OSError as e: + print(f"Warning: Failed to read .celestoignore file: {e}", file=sys.stderr) + print("Continuing deployment without file filtering.", file=sys.stderr) + return None + except Exception as e: + print(f"Warning: Failed to parse .celestoignore patterns: {e}", file=sys.stderr) + print("Continuing deployment without file filtering.", file=sys.stderr) + return None + def _create_deployment( self, bundle: Path, @@ -339,6 +390,10 @@ def deploy( to Celesto. The folder should contain your agent code and any configuration files (e.g., requirements.txt, Dockerfile). + If a .celestoignore file exists in the folder, files and directories + matching the patterns in that file will be excluded from deployment. + The format is identical to .gitignore. + Args: folder: Path to the folder containing agent code name: Unique name for the deployment @@ -373,11 +428,45 @@ def deploy( else: resolved_project_id = self._resolve_first_project_id() + # Load ignore patterns from .celestoignore if it exists + ignore_spec = self._load_ignore_patterns(folder) + # Create tar.gz archive (Nixpacks expects tar.gz format) with tempfile.NamedTemporaryFile(delete=False, suffix=".tar.gz") as temp_file: with tarfile.open(temp_file.name, "w:gz") as tar: - for item in folder.iterdir(): - tar.add(item, arcname=item.name) + # Recursively add all files, respecting .celestoignore patterns + for root, dirs, files in os.walk(folder): + root_path = Path(root) + rel_root = root_path.relative_to(folder) + + # Filter directories in-place to avoid descending into ignored dirs + if ignore_spec: + # Check each directory and remove ignored ones + dirs_to_remove = [] + for d in dirs: + rel_dir = rel_root / d if rel_root != Path(".") else Path(d) + # PathSpec needs forward slashes and trailing slash for dirs + dir_pattern = str(rel_dir).replace("\\", "/") + "/" + if ignore_spec.match_file(dir_pattern): + dirs_to_remove.append(d) + for d in dirs_to_remove: + dirs.remove(d) + + # Add files that aren't ignored + for file in files: + file_path = root_path / file + rel_file = rel_root / file if rel_root != Path(".") else Path(file) + + # Skip if file matches ignore patterns + if ignore_spec: + # PathSpec needs forward slashes + file_pattern = str(rel_file).replace("\\", "/") + if ignore_spec.match_file(file_pattern): + continue + + # Add file to archive with relative path + arcname = str(rel_file).replace("\\", "/") + tar.add(file_path, arcname=arcname) bundle = Path(temp_file.name) try: diff --git a/tests/test_celestoignore.py b/tests/test_celestoignore.py new file mode 100644 index 0000000..508d7e5 --- /dev/null +++ b/tests/test_celestoignore.py @@ -0,0 +1,303 @@ +"""Tests for .celestoignore file handling during deployment.""" + +import os +import tarfile +import tempfile +from pathlib import Path + +import pytest + +from celesto.sdk.client import Deployment, _BaseConnection + + +class MockConnection(_BaseConnection): + """Mock connection for testing without requiring API key.""" + + def __init__(self): + super().__init__(api_key="test", base_url="http://test") + self.session = None + + +@pytest.fixture +def deployment(): + """Create a Deployment instance for testing.""" + return Deployment(MockConnection()) + + +def test_comment_lines_are_ignored_in_celestoignore(deployment, tmp_path: Path): + """Test that lines starting with # in .celestoignore are treated as comments.""" + # Create test files including one that starts with # + (tmp_path / "main.py").write_text("print('hello')") + (tmp_path / "#important.py").write_text("# This file starts with #") + (tmp_path / "README.md").write_text("# Documentation") + (tmp_path / "actual_ignore.pyc").write_text("compiled") + + # Create .celestoignore with comments and actual patterns + celestoignore = tmp_path / ".celestoignore" + celestoignore.write_text( + """# This is a comment and should be ignored +# Another comment line +*.pyc + +# Yet another comment +# Files starting with # should NOT be excluded by these comment lines +""" + ) + + # Load ignore patterns + ignore_spec = deployment._load_ignore_patterns(tmp_path) + assert ignore_spec is not None, "Should load ignore patterns" + + # Test that comment lines don't cause files to be ignored + assert not ignore_spec.match_file("main.py"), "main.py should not be ignored" + assert not ignore_spec.match_file( + "#important.py" + ), "Files starting with # should not be ignored by comment lines" + assert not ignore_spec.match_file("README.md"), "README.md should not be ignored" + + # Test that actual patterns work + assert ignore_spec.match_file( + "actual_ignore.pyc" + ), "*.pyc pattern should match .pyc files" + + +def test_empty_lines_are_ignored_in_celestoignore(deployment, tmp_path: Path): + """Test that empty lines in .celestoignore are ignored.""" + # Create test files + (tmp_path / "keep.py").write_text("code") + (tmp_path / "ignore.log").write_text("logs") + + # Create .celestoignore with empty lines + celestoignore = tmp_path / ".celestoignore" + celestoignore.write_text( + """ +# Comment + +*.log + + +# Another comment with blank lines above +""" + ) + + # Load ignore patterns + ignore_spec = deployment._load_ignore_patterns(tmp_path) + assert ignore_spec is not None + + # Test file matching + assert not ignore_spec.match_file("keep.py"), "keep.py should not be ignored" + assert ignore_spec.match_file("ignore.log"), "ignore.log should be ignored" + + +def test_files_starting_with_hash_are_included_in_deployment( + deployment, tmp_path: Path +): + """Test that files whose names start with # are included in deployment.""" + # Create files including ones starting with # + (tmp_path / "normal.py").write_text("code") + (tmp_path / "#config.yaml").write_text("config") + (tmp_path / "#.hidden").write_text("hidden") + + # Create .celestoignore with only comments (no actual ignore patterns) + celestoignore = tmp_path / ".celestoignore" + celestoignore.write_text( + """# This is just a comment +# Another comment +# No actual ignore patterns here +""" + ) + + # Create a tar archive using the same logic as deploy() + ignore_spec = deployment._load_ignore_patterns(tmp_path) + + with tempfile.NamedTemporaryFile(suffix=".tar.gz", delete=False) as temp_file: + temp_path = Path(temp_file.name) + + try: + with tarfile.open(temp_path, "w:gz") as tar: + for root, dirs, files in os.walk(tmp_path): + root_path = Path(root) + rel_root = root_path.relative_to(tmp_path) + + for file in files: + file_path = root_path / file + rel_file = rel_root / file if rel_root != Path(".") else Path(file) + + # Skip if file matches ignore patterns + if ignore_spec: + file_pattern = str(rel_file).replace("\\", "/") + if ignore_spec.match_file(file_pattern): + continue + + # Add file to archive + arcname = str(rel_file).replace("\\", "/") + tar.add(file_path, arcname=arcname) + + # Verify the archive contents + with tarfile.open(temp_path, "r:gz") as tar: + members = tar.getnames() + + # All files should be present (including those starting with #) + assert "normal.py" in members, "normal.py should be in archive" + assert "#config.yaml" in members, "#config.yaml should be in archive" + assert "#.hidden" in members, "#.hidden should be in archive" + assert ".celestoignore" in members, ".celestoignore should be in archive" + + finally: + temp_path.unlink() + + +def test_mixed_comments_and_patterns(deployment, tmp_path: Path): + """Test .celestoignore with mixed comments and actual patterns.""" + # Create test files + (tmp_path / "keep.py").write_text("code") + (tmp_path / "test.pyc").write_text("compiled") + (tmp_path / ".env").write_text("secrets") + (tmp_path / "#note.txt").write_text("note") + Path(tmp_path / "__pycache__").mkdir() + (tmp_path / "__pycache__" / "cache.pyc").write_text("cache") + + # Create .celestoignore with mixed content + celestoignore = tmp_path / ".celestoignore" + celestoignore.write_text( + """# Python compilation artifacts +*.pyc +__pycache__/ + +# Environment variables +.env + +# This is a comment about files starting with # +# They should NOT be excluded by this comment +""" + ) + + ignore_spec = deployment._load_ignore_patterns(tmp_path) + assert ignore_spec is not None + + # Files that should NOT be ignored + assert not ignore_spec.match_file("keep.py"), "keep.py should not be ignored" + assert not ignore_spec.match_file( + "#note.txt" + ), "#note.txt should not be ignored by comments" + + # Files that SHOULD be ignored + assert ignore_spec.match_file("test.pyc"), "*.pyc should match .pyc files" + assert ignore_spec.match_file(".env"), ".env should be ignored" + assert ignore_spec.match_file( + "__pycache__/cache.pyc" + ), "Files in __pycache__/ should be ignored" + + +def test_celestoignore_does_not_ignore_itself(deployment, tmp_path: Path): + """Test that .celestoignore file itself is not ignored.""" + # Create .celestoignore + celestoignore = tmp_path / ".celestoignore" + celestoignore.write_text("*.pyc\n") + + ignore_spec = deployment._load_ignore_patterns(tmp_path) + assert ignore_spec is not None + + # .celestoignore should not be ignored + assert not ignore_spec.match_file( + ".celestoignore" + ), ".celestoignore should not be ignored" + + +def test_inline_comments_are_supported(deployment, tmp_path: Path): + """Test that inline comments (# after pattern) are properly stripped. + + Inline comments allow users to add explanatory text after patterns. + Everything after # on a line is treated as a comment and ignored. + """ + # Create test files + (tmp_path / "test.pyc").write_text("compiled") + (tmp_path / "keep.py").write_text("code") + (tmp_path / ".env").write_text("secrets") + + # Create .celestoignore with inline comments + celestoignore = tmp_path / ".celestoignore" + celestoignore.write_text( + """*.pyc # Python compiled files +.env # Environment variables +# This is a full-line comment +*.log # Log files (but no .log files exist) +""" + ) + + ignore_spec = deployment._load_ignore_patterns(tmp_path) + assert ignore_spec is not None + + # Patterns with inline comments should work (comments are stripped) + assert ignore_spec.match_file("test.pyc"), "*.pyc with inline comment should match" + assert ignore_spec.match_file(".env"), ".env with inline comment should match" + + # Files not matching patterns should not be ignored + assert not ignore_spec.match_file("keep.py"), "keep.py should not be ignored" + + +def test_all_files_ignored_creates_valid_archive(deployment, tmp_path: Path): + """Test that ignoring all files creates a valid (but empty) tar archive. + + This verifies graceful handling when .celestoignore patterns match all + files in the deployment folder, resulting in an empty archive. + """ + # Create test files + (tmp_path / "test.pyc").write_text("compiled") + (tmp_path / "test.log").write_text("logs") + (tmp_path / ".env").write_text("secrets") + + # Create .celestoignore that ignores everything + celestoignore = tmp_path / ".celestoignore" + celestoignore.write_text( + """# Ignore all files +* +""" + ) + + # Load ignore patterns + ignore_spec = deployment._load_ignore_patterns(tmp_path) + assert ignore_spec is not None + + # Create tar archive (should be valid but contain no files except .celestoignore) + with tempfile.NamedTemporaryFile(suffix=".tar.gz", delete=False) as temp_file: + temp_path = Path(temp_file.name) + + try: + with tarfile.open(temp_path, "w:gz") as tar: + for root, dirs, files in os.walk(tmp_path): + root_path = Path(root) + rel_root = root_path.relative_to(tmp_path) + + for file in files: + file_path = root_path / file + rel_file = rel_root / file if rel_root != Path(".") else Path(file) + + # Skip if file matches ignore patterns + if ignore_spec: + file_pattern = str(rel_file).replace("\\", "/") + if ignore_spec.match_file(file_pattern): + continue + + # Add file to archive + arcname = str(rel_file).replace("\\", "/") + tar.add(file_path, arcname=arcname) + + # Verify the archive is valid and check contents + with tarfile.open(temp_path, "r:gz") as tar: + members = tar.getnames() + + # The archive should be valid (can be opened) + # Note: .celestoignore itself won't be in archive because it's matched by * + # In real deployment, this would result in empty or near-empty bundle + assert isinstance( + members, list + ), "Archive should be valid with list of members" + + # Verify that the test files are NOT in the archive + assert "test.pyc" not in members, "test.pyc should be ignored" + assert "test.log" not in members, "test.log should be ignored" + assert ".env" not in members, ".env should be ignored" + + finally: + temp_path.unlink() diff --git a/tests/test_celestoignore_spec.py b/tests/test_celestoignore_spec.py new file mode 100644 index 0000000..e192e5e --- /dev/null +++ b/tests/test_celestoignore_spec.py @@ -0,0 +1,260 @@ +"""Tests for .celestoignore gitignore spec compliance. + +These tests verify that .celestoignore follows the gitignore specification: +1. Lines starting with # (after optional whitespace) are comments +2. A # elsewhere in the line is literal unless preceded by whitespace (inline comment) +3. Trailing spaces are ignored unless escaped +4. Patterns with # in the middle should match literally +""" + +import tarfile +import tempfile +from pathlib import Path + +import pytest + +from celesto.sdk.client import Deployment, _BaseConnection + + +class MockConnection(_BaseConnection): + """Mock connection for testing without requiring API key.""" + + def __init__(self): + super().__init__(api_key="test", base_url="http://test") + self.session = None + + +@pytest.fixture +def deployment(): + """Create a Deployment instance for testing.""" + return Deployment(MockConnection()) + + +def test_hash_in_middle_of_pattern_is_literal(deployment, tmp_path: Path): + """Test that # in the middle of a pattern is treated as literal character.""" + # Create test files + (tmp_path / "file#with#hash.txt").write_text("content") + (tmp_path / "normal.txt").write_text("content") + + # Create .celestoignore with pattern containing # in the middle + celestoignore = tmp_path / ".celestoignore" + celestoignore.write_text("file#with#hash.txt\n") + + ignore_spec = deployment._load_ignore_patterns(tmp_path) + assert ignore_spec is not None + + # Should match the file with # in name + assert ignore_spec.match_file( + "file#with#hash.txt" + ), "Pattern with # should match literal filename with #" + assert not ignore_spec.match_file( + "normal.txt" + ), "Pattern with # should not match unrelated file" + + +def test_hash_at_start_after_pattern_is_literal(deployment, tmp_path: Path): + """Test that #foo (no space before #) in a pattern is treated literally.""" + # Create test files + (tmp_path / "test#file.txt").write_text("content") + (tmp_path / "testfile.txt").write_text("content") + + # Pattern without space before # should be literal + celestoignore = tmp_path / ".celestoignore" + celestoignore.write_text("test#file.txt\n") + + ignore_spec = deployment._load_ignore_patterns(tmp_path) + assert ignore_spec is not None + + assert ignore_spec.match_file("test#file.txt"), "Should match file with # in name" + assert not ignore_spec.match_file( + "testfile.txt" + ), "Should not match file without #" + + +def test_inline_comment_with_space_before_hash(deployment, tmp_path: Path): + """Test that # preceded by space starts an inline comment.""" + # Create test files + (tmp_path / "test.pyc").write_text("compiled") + (tmp_path / "test.py").write_text("source") + + # Pattern with space before # should treat # as comment start + celestoignore = tmp_path / ".celestoignore" + celestoignore.write_text("*.pyc # This is an inline comment\n") + + ignore_spec = deployment._load_ignore_patterns(tmp_path) + assert ignore_spec is not None + + # Should match .pyc files (comment is stripped) + assert ignore_spec.match_file("test.pyc"), "Pattern should work with inline comment" + assert not ignore_spec.match_file("test.py"), "Should not match .py files" + + +def test_multiple_inline_comments_on_different_lines(deployment, tmp_path: Path): + """Test multiple patterns with inline comments.""" + # Create test files + (tmp_path / "test.pyc").write_text("compiled") + (tmp_path / ".env").write_text("secrets") + (tmp_path / "test.py").write_text("source") + + celestoignore = tmp_path / ".celestoignore" + celestoignore.write_text( + """*.pyc # Python compiled files +.env # Environment variables +*.log # Log files +""" + ) + + ignore_spec = deployment._load_ignore_patterns(tmp_path) + assert ignore_spec is not None + + assert ignore_spec.match_file("test.pyc"), "Should match .pyc" + assert ignore_spec.match_file(".env"), "Should match .env" + assert not ignore_spec.match_file("test.py"), "Should not match .py" + + +def test_pattern_ending_with_hash_no_space(deployment, tmp_path: Path): + """Test pattern ending with # (no space before it) is treated literally.""" + # Create test file with # at the end of name + (tmp_path / "file#").write_text("content") + (tmp_path / "file").write_text("content") + + celestoignore = tmp_path / ".celestoignore" + celestoignore.write_text("file#\n") + + ignore_spec = deployment._load_ignore_patterns(tmp_path) + assert ignore_spec is not None + + assert ignore_spec.match_file("file#"), "Should match file ending with #" + assert not ignore_spec.match_file("file"), "Should not match file without #" + + +def test_full_line_comment_variations(deployment, tmp_path: Path): + """Test that lines starting with # (after whitespace) are comments.""" + # Create test files + (tmp_path / "keep.txt").write_text("content") + (tmp_path / "#file.txt").write_text("content") + + celestoignore = tmp_path / ".celestoignore" + celestoignore.write_text( + """# This is a comment + # This is also a comment (leading spaces) + # This is a comment with tab +*.log +""" + ) + + ignore_spec = deployment._load_ignore_patterns(tmp_path) + assert ignore_spec is not None + + # Comments should not affect pattern matching + assert not ignore_spec.match_file("keep.txt"), "Comments shouldn't exclude files" + assert not ignore_spec.match_file( + "#file.txt" + ), "Comments shouldn't exclude files starting with #" + assert ignore_spec.match_file("test.log"), "Actual patterns should work" + + +def test_wildcard_with_hash_in_pattern(deployment, tmp_path: Path): + """Test wildcard patterns containing # character.""" + # Create test files + (tmp_path / "test#1.txt").write_text("content") + (tmp_path / "test#2.txt").write_text("content") + (tmp_path / "test-1.txt").write_text("content") + + celestoignore = tmp_path / ".celestoignore" + celestoignore.write_text("test#*.txt\n") + + ignore_spec = deployment._load_ignore_patterns(tmp_path) + assert ignore_spec is not None + + # Should match files with # in the name matching the pattern + assert ignore_spec.match_file("test#1.txt"), "Should match test#1.txt" + assert ignore_spec.match_file("test#2.txt"), "Should match test#2.txt" + assert not ignore_spec.match_file( + "test-1.txt" + ), "Should not match file without # in name" + + +def test_pattern_with_hash_and_inline_comment(deployment, tmp_path: Path): + """Test pattern containing # that also has an inline comment.""" + # Create test file + (tmp_path / "file#name.txt").write_text("content") + (tmp_path / "filename.txt").write_text("content") + + # Pattern: file#name.txt # inline comment + # The first # is part of pattern, second # (after space) starts comment + celestoignore = tmp_path / ".celestoignore" + celestoignore.write_text("file#name.txt # This is an inline comment\n") + + ignore_spec = deployment._load_ignore_patterns(tmp_path) + assert ignore_spec is not None + + assert ignore_spec.match_file( + "file#name.txt" + ), "Should match file with # in name despite inline comment" + assert not ignore_spec.match_file( + "filename.txt" + ), "Should not match file without #" + + +def test_trailing_spaces_are_ignored(deployment, tmp_path: Path): + """Test that trailing spaces in patterns are ignored per gitignore spec.""" + # Create test files + (tmp_path / "test.txt").write_text("content") + (tmp_path / "test.txt ").write_text("content") # filename with trailing space + + # Pattern with trailing spaces (should be stripped) + celestoignore = tmp_path / ".celestoignore" + celestoignore.write_text("test.txt \n") # Multiple trailing spaces + + ignore_spec = deployment._load_ignore_patterns(tmp_path) + assert ignore_spec is not None + + # Should match the file without trailing spaces + assert ignore_spec.match_file("test.txt"), "Should match file without trailing space" + + +def test_empty_and_whitespace_only_lines(deployment, tmp_path: Path): + """Test that empty lines and whitespace-only lines are ignored.""" + # Create test files + (tmp_path / "test.pyc").write_text("compiled") + + celestoignore = tmp_path / ".celestoignore" + celestoignore.write_text( + """ + +*.pyc + + + +""" + ) + + ignore_spec = deployment._load_ignore_patterns(tmp_path) + assert ignore_spec is not None + + assert ignore_spec.match_file( + "test.pyc" + ), "Pattern should work despite empty lines" + + +def test_negation_pattern_with_inline_comment(deployment, tmp_path: Path): + """Test negation patterns (!) with inline comments.""" + # Create test files + (tmp_path / "test.log").write_text("log") + (tmp_path / "important.log").write_text("important") + + celestoignore = tmp_path / ".celestoignore" + celestoignore.write_text( + """*.log # Ignore all logs +!important.log # But not this one +""" + ) + + ignore_spec = deployment._load_ignore_patterns(tmp_path) + assert ignore_spec is not None + + assert ignore_spec.match_file("test.log"), "Should match .log files" + assert not ignore_spec.match_file( + "important.log" + ), "Should not match negated pattern"