diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 1607eef..88e2ddf 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -9,8 +9,10 @@ "ghcr.io/devcontainers-contrib/features/uv:1": {} }, - // Install all deps (including cli + dev extras) and set up git hooks - "postCreateCommand": "uv sync --all-extras && pre-commit install", + "postCreateCommand": { + "python": "uv sync --all-extras && pre-commit install", + "claude": "curl -fsSL https://claude.ai/install.sh | bash" + }, // Local MCP dev server "forwardPorts": [8000], diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f059fa3..818e66c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,8 +12,8 @@ on: - develop jobs: - code-quality: - name: Code quality + lint: + name: Lint & format runs-on: ubuntu-latest steps: @@ -37,9 +37,6 @@ jobs: - name: Ruff lint run: uv run ruff check core/ plugins/ server/ tests/ - - name: Audit dependencies for known CVEs - run: uv run pip-audit -r requirements.txt - - name: Set up Go 1.21 uses: actions/setup-go@v5 with: @@ -55,8 +52,35 @@ jobs: exit 1 fi - test-suite: + security: + name: Security audit + needs: lint + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install uv + uses: astral-sh/setup-uv@v4 + with: + enable-cache: true + cache-dependency-glob: "uv.lock" + + - name: Install dependencies + run: uv sync --all-extras + + - name: Audit dependencies for known CVEs + run: uv run pip-audit -r requirements.txt + + test: name: Test suite + needs: lint runs-on: ubuntu-latest steps: diff --git a/.github/workflows/infra.yml b/.github/workflows/infra.yml index 25f9f67..3b20c6b 100644 --- a/.github/workflows/infra.yml +++ b/.github/workflows/infra.yml @@ -14,28 +14,14 @@ on: - "terraform/**" jobs: - terraform-validate: - name: Terraform validate + terraform-lint: + name: Terraform lint & validate runs-on: ubuntu-latest - env: - TF_PLUGIN_CACHE_DIR: ${{ runner.temp }}/.terraform.d/plugin-cache steps: - name: Checkout code uses: actions/checkout@v4 - - name: Cache Terraform providers - uses: actions/cache@v4 - with: - path: ${{ runner.temp }}/.terraform.d/plugin-cache - key: ${{ runner.os }}-terraform-${{ hashFiles('terraform/**/.terraform.lock.hcl') }} - - - name: Cache Terraform init - uses: actions/cache@v4 - with: - path: terraform/aws/.terraform - key: ${{ runner.os }}-tf-init-${{ hashFiles('terraform/**/.terraform.lock.hcl') }} - - name: Set up Terraform uses: hashicorp/setup-terraform@v3 @@ -43,29 +29,48 @@ jobs: working-directory: ./terraform run: | if ! terraform fmt -check -recursive; then - echo "Terraform files are not properly formatted." + echo "::error::Terraform files are not properly formatted." echo "Run 'terraform fmt -recursive' locally and commit the result." exit 1 fi - - name: Terraform init (no backend) - working-directory: ./terraform/aws - run: terraform init -backend=false - - - name: Create placeholder Lambda zip for validation - working-directory: ./terraform/aws + - name: Create placeholder artifacts for validation run: | - python3 -c "import zipfile; zipfile.ZipFile('lambda-deployment.zip', 'w').close()" + python3 -c "import zipfile; zipfile.ZipFile('terraform/aws/lambda-deployment.zip', 'w').close()" - - name: Terraform validate - working-directory: ./terraform/aws - run: terraform validate + - name: Validate all Terraform directories + run: | + failed=0 + for dir in terraform/*/; do + if ls "$dir"*.tf 1>/dev/null 2>&1; then + echo "--- Validating $dir ---" + terraform -chdir="$dir" init -backend=false -input=false + if ! terraform -chdir="$dir" validate; then + echo "::error::Validation failed in $dir" + failed=1 + fi + else + echo "--- Skipping $dir (no .tf files) ---" + fi + done + exit $failed - name: Set up TFLint uses: terraform-linters/setup-tflint@v4 - - name: Run TFLint - run: tflint --chdir=terraform/aws + - name: Run TFLint on all directories + run: | + failed=0 + for dir in terraform/*/; do + if ls "$dir"*.tf 1>/dev/null 2>&1; then + echo "--- TFLint: $dir ---" + if ! tflint --chdir="$dir"; then + echo "::warning::TFLint issues in $dir" + failed=1 + fi + fi + done + exit $failed - name: Run tfsec uses: aquasecurity/tfsec-action@v1.0.3 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2e5c4f9..d7bde99 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -4,8 +4,8 @@ name: Release on: push: tags: - - "v*.*.*" # Triggers on version tags like v1.0.0, v2.1.3, etc. - workflow_dispatch: # Allows manual triggering + - "v*.*.*" + workflow_dispatch: inputs: version: description: "Version tag (e.g., v1.0.0)" @@ -17,11 +17,68 @@ env: GO_VERSION: "1.21" jobs: + validate: + name: Pre-release validation + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install uv + uses: astral-sh/setup-uv@v4 + with: + enable-cache: true + cache-dependency-glob: "uv.lock" + + - name: Install dependencies + run: uv sync --all-extras + + - name: Ruff lint + run: uv run ruff check core/ plugins/ server/ tests/ + + - name: Audit dependencies for known CVEs + run: uv run pip-audit -r requirements.txt + + - name: Run Python tests with coverage + run: | + uv run pytest tests/ \ + -n auto \ + --cov=core \ + --cov=plugins \ + --cov-report=term-missing \ + --cov-fail-under=80 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + + - name: Check Go formatting + working-directory: ./client + run: | + UNFORMATTED=$(gofmt -l .) + if [ -n "$UNFORMATTED" ]; then + echo "The following files need formatting:" + echo "$UNFORMATTED" + exit 1 + fi + + - name: Run Go tests + working-directory: ./client + run: go test ./... + build: name: Build binaries + needs: validate runs-on: ubuntu-latest permissions: - contents: write # Required to create releases and upload assets + contents: write strategy: matrix: @@ -46,7 +103,7 @@ jobs: - name: Checkout code uses: actions/checkout@v4 with: - fetch-depth: 0 # Fetch all history for proper versioning + fetch-depth: 0 - name: Set up Go uses: actions/setup-go@v5 @@ -84,6 +141,7 @@ jobs: build-lambda-zip: name: Build Lambda ZIP + needs: validate runs-on: ubuntu-latest permissions: contents: write @@ -172,7 +230,6 @@ jobs: run: | cd artifacts echo "=== Flattening artifacts ===" - # Find all release files (binaries + lambda zip) in subdirectories and move to root find . -mindepth 2 -type f \( -name "opencontext-client-*" -o -name "opencontext-lambda-*" \) | while read file; do filename=$(basename "$file") dirname=$(dirname "$file") @@ -182,7 +239,6 @@ jobs: rmdir "$dirname" 2>/dev/null || true mv "$tempname" "$filename" done - # Remove any remaining empty subdirectories find . -mindepth 1 -type d -empty -delete echo "=== Flattening complete ===" diff --git a/.gitignore b/.gitignore index 6f97539..0605d0e 100644 --- a/.gitignore +++ b/.gitignore @@ -231,3 +231,4 @@ examples/ # Terraform variable files (may contain real secrets — use *.tfvars.example as templates) terraform/**/*.tfvars +.claude/ diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 0fd6df9..0d2ea57 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -38,7 +38,7 @@ { "label": "Dev: start local server", "type": "shell", - "command": "python3 scripts/local_server.py", + "command": "opencontext serve", "presentation": { "reveal": "always", "panel": "dedicated" diff --git a/CLAUDE.md b/CLAUDE.md index c5ead3d..55179c0 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -18,10 +18,10 @@ pre-commit install # set up git hooks ```bash # Run local dev server (http://localhost:8000/mcp) -python3 scripts/local_server.py +opencontext serve # Test the running server -./scripts/test_streamable_http.sh +opencontext test --url http://localhost:8000/mcp # Tests pytest tests/ -n auto --cov=core --cov=plugins --cov-fail-under=80 @@ -31,10 +31,13 @@ ruff check core/ plugins/ server/ tests/ --fix --unsafe-fixes ruff format core/ plugins/ server/ tests/ # CLI -opencontext validate # validate config.yaml +opencontext validate --env staging # validate config + Terraform before deploy opencontext deploy --env staging -opencontext status -opencontext logs +opencontext status --env staging +opencontext logs --env staging +opencontext plugin list # list enabled/disabled plugins +opencontext security # pip-audit vulnerability scan +opencontext architecture # print AWS infra diagram in terminal ``` ## Project Layout @@ -45,7 +48,7 @@ plugins/ # Built-in plugins: ckan/, arcgis/, socrata/ custom_plugins/ # Drop user plugins here — auto-discovered at startup cli/ # Typer CLI (opencontext command) server/ # HTTP adapters: local aiohttp + AWS Lambda entry point -scripts/ # local_server.py, deploy.sh, test_streamable_http.sh +server/adapters/ # local aiohttp dev server + AWS Lambda entry point tests/ # pytest suite (80% coverage required) terraform/aws/ # Lambda + API Gateway + IAM IaC examples/ # Per-city config.yaml examples (Boston, Chicago, Seattle, etc.) @@ -58,7 +61,7 @@ Key files: - `core/plugin_manager.py` — discovery, loading, one-plugin enforcement - `core/validators.py` — config validation; enforces the one-plugin rule - `server/adapters/aws_lambda.py` — Lambda entry point -- `scripts/local_server.py` — aiohttp dev server +- `cli/commands/serve.py` — aiohttp dev server (started via `opencontext serve`) ## Plugin System @@ -115,6 +118,6 @@ uv run pytest tests/ -n auto --cov=core --cov=plugins --cov-fail-under=80 - **Tool prefix required** → call `ckan__search_datasets`, not `search_datasets`. - **`config.yaml` is gitignored** → changes to it won't be committed. Use `config-example.yaml` for template changes. - **Coverage < 80%** → CI fails. New code needs tests; check gaps with `--cov-report=html`. -- **Lambda size limit** → 250 MB max. `scripts/deploy.sh` validates before packaging. +- **Lambda size limit** → 250 MB max. `opencontext deploy` validates package size before uploading. - **Python 3.11+ required** → match this in any new tooling or containers. - **Go client** (`client/`) is an optional stdio-to-HTTP bridge for tools that only speak stdio MCP. diff --git a/README.md b/README.md index 67bed85..dea08bd 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ opencontext authenticate opencontext configure # 3. Test locally (optional) -python3 scripts/local_server.py +opencontext serve # 4. Deploy opencontext deploy --env staging @@ -47,7 +47,10 @@ See [Getting Started](docs/GETTING_STARTED.md) for full setup. | Doc | Description | | ------------------------------------------ | ----------------------------------------------- | | [Getting Started](docs/GETTING_STARTED.md) | Setup and usage | +| [CLI Reference](docs/CLI.md) | All CLI commands and flags | | [Architecture](docs/ARCHITECTURE.md) | System design and plugins | +| [Built-in Plugins](docs/BUILT_IN_PLUGINS.md) | CKAN, ArcGIS Hub, Socrata plugin details | +| [Custom Plugins](docs/CUSTOM_PLUGINS.md) | How to write your own plugin | | [Deployment](docs/DEPLOYMENT.md) | AWS, Terraform, monitoring | | [Testing](docs/TESTING.md) | Local testing (Terminal, Claude, MCP Inspector) | diff --git a/cli/commands/configure.py b/cli/commands/configure.py index c57ed39..7f4c5cb 100644 --- a/cli/commands/configure.py +++ b/cli/commands/configure.py @@ -3,6 +3,8 @@ import subprocess from pathlib import Path +import boto3 +import botocore.exceptions import questionary import typer import yaml @@ -18,6 +20,67 @@ PLUGINS = ["CKAN", "Socrata", "ArcGIS"] +# Bucket name must match the `backend "s3"` block in terraform/aws/main.tf. +TERRAFORM_STATE_BUCKET = "opencontext-terraform-state" + + +def _ensure_state_bucket(bucket_name: str, region: str) -> None: + """Check that the Terraform S3 state bucket exists; create it if not. + + Versioning and server-side encryption are enabled on newly created buckets. + No DynamoDB table is created — the Terraform backend does not use state + locking. + """ + s3 = boto3.client("s3", region_name=region) + + try: + s3.head_bucket(Bucket=bucket_name) + console.print( + f"[dim]Terraform state bucket [bold]{bucket_name}[/bold] already exists.[/dim]" + ) + return + except botocore.exceptions.ClientError as e: + error_code = e.response["Error"]["Code"] + if error_code not in ("404", "NoSuchBucket"): + raise + + # Bucket does not exist — create it. + console.print( + f"[yellow]Terraform state bucket [bold]{bucket_name}[/bold] not found. Creating...[/yellow]" + ) + + if region == "us-east-1": + # us-east-1 does not accept a LocationConstraint. + s3.create_bucket(Bucket=bucket_name) + else: + s3.create_bucket( + Bucket=bucket_name, + CreateBucketConfiguration={"LocationConstraint": region}, + ) + + s3.put_bucket_versioning( + Bucket=bucket_name, + VersioningConfiguration={"Status": "Enabled"}, + ) + + s3.put_bucket_encryption( + Bucket=bucket_name, + ServerSideEncryptionConfiguration={ + "Rules": [ + { + "ApplyServerSideEncryptionByDefault": { + "SSEAlgorithm": "AES256", + } + } + ] + }, + ) + + console.print( + f"[green]Created S3 bucket [bold]{bucket_name}[/bold] " + f"(region: {region}, versioning: enabled, encryption: AES256).[/green]" + ) + def _load_example_defaults(project_root: Path) -> dict: example = project_root / "config-example.yaml" @@ -35,14 +98,20 @@ def _prompt_plugin_config(plugin: str, defaults: dict) -> dict: cfg: dict = {"enabled": True} if plugin == "CKAN": - cfg["base_url"] = (questionary.text( - "CKAN API base URL:", - default=plugin_defaults.get("base_url", "https://data.example.gov"), - ).ask() or "").rstrip("/") - cfg["portal_url"] = (questionary.text( - "CKAN public portal URL:", - default=plugin_defaults.get("portal_url", cfg["base_url"]), - ).ask() or "").rstrip("/") + cfg["base_url"] = ( + questionary.text( + "CKAN API base URL:", + default=plugin_defaults.get("base_url", "https://data.example.gov"), + ).ask() + or "" + ).rstrip("/") + cfg["portal_url"] = ( + questionary.text( + "CKAN public portal URL:", + default=plugin_defaults.get("portal_url", cfg["base_url"]), + ).ask() + or "" + ).rstrip("/") cfg["city_name"] = questionary.text( "City name (for display):", default=plugin_defaults.get("city_name", "Your City"), @@ -54,10 +123,13 @@ def _prompt_plugin_config(plugin: str, defaults: dict) -> dict: cfg["timeout"] = int(timeout) elif plugin == "Socrata": - cfg["base_url"] = (questionary.text( - "Socrata base URL:", - default=plugin_defaults.get("base_url", "https://data.example.gov"), - ).ask() or "").rstrip("/") + cfg["base_url"] = ( + questionary.text( + "Socrata base URL:", + default=plugin_defaults.get("base_url", "https://data.example.gov"), + ).ask() + or "" + ).rstrip("/") app_token = questionary.text( "Socrata app token (optional, press Enter to skip):", default=plugin_defaults.get("app_token", ""), @@ -71,10 +143,13 @@ def _prompt_plugin_config(plugin: str, defaults: dict) -> dict: cfg["timeout"] = int(timeout) elif plugin == "ArcGIS": - cfg["portal_url"] = (questionary.text( - "ArcGIS Hub portal URL:", - default=plugin_defaults.get("portal_url", "https://hub.arcgis.com"), - ).ask() or "").rstrip("/") + cfg["portal_url"] = ( + questionary.text( + "ArcGIS Hub portal URL:", + default=plugin_defaults.get("portal_url", "https://hub.arcgis.com"), + ).ask() + or "" + ).rstrip("/") cfg["city_name"] = questionary.text( "City name (for display):", default=plugin_defaults.get("city_name", "Your City"), @@ -123,8 +198,19 @@ def _write_tfvars( @friendly_exit -def configure() -> None: +def configure( + state_bucket: str = typer.Option( + TERRAFORM_STATE_BUCKET, + "--state-bucket", + help="S3 bucket name for Terraform state (default: opencontext-terraform-state)", + ), +) -> None: """Interactive wizard to configure your OpenContext MCP server.""" + # When called programmatically (e.g. in tests), Typer does not resolve + # Option defaults — guard against receiving the raw OptionInfo sentinel. + if not isinstance(state_bucket, str): + state_bucket = TERRAFORM_STATE_BUCKET + project_root = get_project_root() terraform_dir = get_terraform_dir() @@ -262,6 +348,23 @@ def configure() -> None: # Terraform workspace ws_name = f"{city_slug}-{env}" + _ensure_state_bucket(state_bucket, region) + + # Override the backend config at init time so Terraform uses the correct + # bucket and region instead of the defaults hard-coded in main.tf. + if not (terraform_dir / ".terraform").exists(): + init_cmd = [ + "terraform", + "init", + f"-backend-config=bucket={state_bucket}", + f"-backend-config=region={region}", + ] + run_cmd( + init_cmd, + cwd=terraform_dir, + spinner_msg="Initializing Terraform", + ) + result = subprocess.run( ["terraform", "workspace", "list"], cwd=terraform_dir, @@ -284,14 +387,6 @@ def configure() -> None: spinner_msg=f"Creating workspace [bold]{ws_name}[/bold]", ) - # Terraform init if needed - if not (terraform_dir / ".terraform").exists(): - run_cmd( - ["terraform", "init"], - cwd=terraform_dir, - spinner_msg="Initializing Terraform", - ) - # Print summary summary = Table(title="Configuration Summary", show_lines=True) summary.add_column("Setting", style="bold") diff --git a/cli/commands/serve.py b/cli/commands/serve.py new file mode 100644 index 0000000..b3bda73 --- /dev/null +++ b/cli/commands/serve.py @@ -0,0 +1,228 @@ +"""CLI command: opencontext serve — run the local dev MCP server.""" + +from __future__ import annotations + +import asyncio +import json +import logging +import os +import time +import uuid +from pathlib import Path + +import typer +import yaml +from aiohttp import web + +from cli.utils import console +from core.logging_utils import configure_json_logging +from core.mcp_server import MCPServer +from core.plugin_manager import PluginManager +from core.validators import get_logging_config + +app = typer.Typer() + +logger = logging.getLogger(__name__) + + +def _load_config(config_path: str) -> tuple[dict, Path]: + """Load YAML config from *config_path*, raising a clear error if missing.""" + resolved = Path(config_path).resolve() + if not resolved.exists(): + console.print(f"[red]Config file not found:[/red] {resolved}") + raise typer.Exit(1) + with open(resolved) as f: + return yaml.safe_load(f), resolved + + +def _derive_server_name(config: dict) -> str: + """Derive a short server name from the active plugin config.""" + if "plugins" in config: + for plugin_name, plugin_config in config["plugins"].items(): + if not isinstance(plugin_config, dict): + continue + if plugin_config.get("enabled"): + if "city_name" in plugin_config: + city = plugin_config["city_name"].lower().replace(" ", "-") + return f"{city}-opendata" + if "organization" in plugin_config: + org = plugin_config["organization"].lower().replace(" ", "-") + return f"{org}-opendata" + + if "aws" in config and "lambda_name" in config["aws"]: + return config["aws"]["lambda_name"].replace("-mcp", "") + + if "server_name" in config: + return config["server_name"].lower().replace(" ", "-").replace("'", "") + + return "opencontext-mcp" + + +async def _run_server(config: dict, port: int) -> None: + """Initialise the plugin manager and MCP server, then serve until Ctrl+C.""" + # Configure JSON logging with pretty output for local dev + logging_config = get_logging_config(config) + configure_json_logging( + level=logging_config.get("level", "INFO"), + pretty=True, + ) + + console.print("Initializing OpenContext MCP Server locally...") + + plugin_manager = PluginManager(config) + await plugin_manager.load_plugins() + + mcp_server = MCPServer(plugin_manager) + + console.print("Server initialized successfully") + console.print(f"Loaded plugins: {list(plugin_manager.plugins.keys())}") + console.print(f"Available tools: {len(plugin_manager.get_all_tools())}") + + async def handle_mcp_request(request: web.Request) -> web.Response: + start_time = time.perf_counter() + try: + body = await request.text() + headers = dict(request.headers) + + session_id = headers.get("mcp-session-id") or headers.get("Mcp-Session-Id") + + try: + request_json = json.loads(body) + method = request_json.get("method", "unknown") + tool_name = None + tool_args = None + if method == "tools/call": + params = request_json.get("params", {}) + tool_name = params.get("name") + tool_args = params.get("arguments", {}) + except (json.JSONDecodeError, AttributeError): + method = "unknown" + tool_name = None + tool_args = None + + logger.info( + "Incoming MCP request", + extra={ + "session_id": session_id, + "method": method, + "tool_name": tool_name, + "tool_arguments": tool_args if tool_args else None, + }, + ) + + is_initialize = method == "initialize" + session_id_to_return = None + if is_initialize: + session_id_to_return = str(uuid.uuid4()) + logger.info( + f"Initialize request detected, generating session ID: {session_id_to_return}" + ) + + response = await mcp_server.handle_http_request(body, headers) + + response_headers = dict(response.get("headers", {})) + if session_id_to_return: + response_headers["Mcp-Session-Id"] = session_id_to_return + + duration_ms = (time.perf_counter() - start_time) * 1000 + logger.info( + "MCP request processed", + extra={ + "session_id": session_id_to_return or session_id, + "method": method, + "tool_name": tool_name, + "duration_ms": round(duration_ms, 2), + "status_code": response.get("statusCode", 200), + }, + ) + + return web.Response( + text=response.get("body", "{}"), + status=response.get("statusCode", 200), + headers=response_headers, + ) + + except Exception as e: + duration_ms = (time.perf_counter() - start_time) * 1000 + logger.error( + f"Error processing MCP request: {e}", + extra={"duration_ms": round(duration_ms, 2)}, + exc_info=True, + ) + return web.Response( + text=json.dumps( + { + "jsonrpc": "2.0", + "id": None, + "error": {"code": -32603, "message": str(e)}, + } + ), + status=500, + headers={"Content-Type": "application/json"}, + ) + + aiohttp_app = web.Application() + aiohttp_app.router.add_post("/mcp", handle_mcp_request) + + runner = web.AppRunner(aiohttp_app) + await runner.setup() + site = web.TCPSite(runner, "localhost", port) + await site.start() + + server_name = _derive_server_name(config) + base_url = f"http://localhost:{port}/mcp" + + console.print("\n" + "=" * 50) + console.print("Local MCP Server running!") + console.print("=" * 50) + console.print(f"URL: {base_url}") + console.print("\n" + "=" * 50) + console.print("Connect via Claude Connectors") + console.print("=" * 50) + console.print( + "\n1. Go to Settings -> Connectors (or Customize -> Connectors on claude.ai)" + ) + console.print("2. Click 'Add custom connector'") + console.print(f"3. Enter a name ({server_name}) and URL: {base_url}") + console.print( + "\nNote: Localhost works with Claude Desktop only (web needs a deployed URL)." + ) + console.print("\n" + "=" * 50) + console.print("\nTest with:") + console.print(f" opencontext test --url {base_url}") + console.print( + f" or curl -X POST {base_url}" + " -H 'Content-Type: application/json'" + ' -d \'{"jsonrpc":"2.0","id":1,"method":"ping"}\'' + ) + console.print("\nPress Ctrl+C to stop") + console.print("=" * 50 + "\n") + + try: + await asyncio.Event().wait() + except KeyboardInterrupt: + console.print("\nShutting down...") + await plugin_manager.shutdown() + console.print("Server stopped.") + + +@app.callback(invoke_without_command=True) +def serve( + ctx: typer.Context, + port: int = typer.Option(8000, help="Port to listen on (default: 8000)"), + config: str = typer.Option( + "", + help="Path to config.yaml. Overrides OPENCONTEXT_CONFIG env var.", + ), +) -> None: + """Run the OpenContext MCP server locally for development and testing.""" + if ctx.invoked_subcommand is not None: + return + + # Resolve config path: --config flag > OPENCONTEXT_CONFIG env var > default + config_path = config or os.environ.get("OPENCONTEXT_CONFIG", "") or "config.yaml" + + loaded_config, resolved_path = _load_config(config_path) + console.print(f"Using config: {resolved_path}") + + asyncio.run(_run_server(loaded_config, port)) diff --git a/cli/main.py b/cli/main.py index 65dcb49..318b69b 100644 --- a/cli/main.py +++ b/cli/main.py @@ -10,6 +10,7 @@ from cli.commands.logs import logs from cli.commands.plugin import plugin_app from cli.commands.security import app as security_app +from cli.commands.serve import app as serve_app from cli.commands.status import status from cli.commands.test import app as test_app from cli.commands.upgrade import app as upgrade_app @@ -31,6 +32,7 @@ app.command()(logs) app.command()(architecture) +app.add_typer(serve_app, name="serve") app.add_typer(validate_app, name="validate") app.add_typer(test_app, name="test") app.add_typer(upgrade_app, name="upgrade") diff --git a/config-example.yaml b/config-example.yaml index 0582253..55cbe68 100644 --- a/config-example.yaml +++ b/config-example.yaml @@ -52,6 +52,16 @@ plugins: timeout: 120 # token: "${ARCGIS_TOKEN}" # Optional: Bearer token for private items + # Built-in: Socrata (for Socrata open data portals) + # Examples: data.cityofchicago.org, data.cityofnewyork.us, data.seattle.gov + socrata: + enabled: false # Set to true to use + base_url: "https://data.example.gov" # Socrata portal base URL + portal_url: "https://data.example.gov" # Public portal URL + city_name: "Your City" + timeout: 30 + # app_token: "${SOCRATA_APP_TOKEN}" # Recommended; register at dev.socrata.com/register + # yamllint disable rule:comments-indentation # Custom: Add your own plugins here # Example: diff --git a/core/validators.py b/core/validators.py index 4126c11..95f8d39 100644 --- a/core/validators.py +++ b/core/validators.py @@ -69,7 +69,7 @@ def validate_plugin_count(config: Dict[str, Any]) -> Tuple[List[str], int]: f" Fork #1: Enable {enabled_plugins[0]} only\n" f" Fork #2: Enable {enabled_plugins[1]} only\n\n" f" 3. Deploy each fork separately\n" - f" ./scripts/deploy.sh (in each fork)\n\n" + f" opencontext deploy --env (in each fork)\n\n" f"See docs/ARCHITECTURE.md for details." ) diff --git a/custom_plugins/template/plugin_template.py b/custom_plugins/template/plugin_template.py index 0c30f5c..35ca866 100644 --- a/custom_plugins/template/plugin_template.py +++ b/custom_plugins/template/plugin_template.py @@ -95,7 +95,7 @@ def get_tools(self) -> List[ToolDefinition]: """Get list of tools provided by this plugin. Tool names should NOT include the plugin prefix (e.g., use "search" - not "my_custom_plugin.search"). The Plugin Manager will add the prefix. + not "my_custom_plugin__search"). The Plugin Manager will add the prefix. Returns: List of tool definitions diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index c65dd7f..405631b 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -7,8 +7,8 @@ OpenContext is a plugin-based framework. Each deployment runs **one** server wit ## One Fork = One Server **Enforcement:** -- `scripts/deploy.sh` validates config before deployment -- `plugin_manager.py` fails if multiple plugins are enabled +- `opencontext deploy` validates config before deployment +- `plugin_manager.py` fails at startup if multiple plugins are enabled **Multiple servers:** Fork again per plugin, deploy each separately. @@ -27,11 +27,24 @@ server/ │ └── aws_lambda.py # Lambda handler entry point └── http_handler.py # HTTP request handling -plugins/ # Built-in (CKAN) -├── ckan/ +plugins/ # Built-in plugins +├── ckan/ # CKAN open data portals │ ├── plugin.py │ ├── config_schema.py │ └── sql_validator.py +├── arcgis/ # ArcGIS Hub portals +│ ├── plugin.py +│ ├── config_schema.py +│ └── where_validator.py +└── socrata/ # Socrata open data portals + ├── plugin.py + ├── config_schema.py + └── soql_validator.py + +cli/ # Typer CLI (opencontext command) +├── main.py # Command registration +├── commands/ # One file per command group +└── utils.py # Shared helpers custom_plugins/ # User plugins (auto-discovered) ├── template/ @@ -50,14 +63,19 @@ tests/ # Unit tests ### Request Flow ``` -Claude Desktop / App - → stdio bridge (npx) or Go client -Lambda / Local Server - → server.adapters.aws_lambda or local_server.py +Claude / MCP Client + → Claude Connectors (HTTPS) or Go stdio client +API Gateway (REST, Regional) + → Lambda (server.adapters.aws_lambda.lambda_handler) → MCP Server (core/mcp_server.py) - → Plugin Manager - → Plugin (e.g., CKAN) + → Plugin Manager (core/plugin_manager.py) + → Plugin (CKAN / ArcGIS / Socrata / custom) → External API + +Logs & traces: + Lambda → CloudWatch Logs (/aws/lambda/) + Lambda → X-Ray (active tracing) + Failed async invocations → SQS Dead Letter Queue ``` ## Plugins @@ -91,7 +109,60 @@ plugins: | `ckan__get_schema(resource_id)` | Get schema for a resource | | `ckan__execute_sql(sql)` | Execute PostgreSQL SELECT queries (advanced) | -**SQL execution:** The `execute_sql` tool allows complex PostgreSQL queries (CTEs, window functions, joins). Only SELECT is allowed. INSERT, UPDATE, DELETE, DROP, and other destructive operations are blocked. Resource IDs must be valid UUIDs in double quotes: `FROM "uuid-here"`. See [CKAN API docs](https://docs.ckan.org/en/latest/api/) for details. +**SQL execution:** Only SELECT is allowed. Resource IDs must be valid UUIDs in double quotes: `FROM "uuid-here"`. See [CKAN API docs](https://docs.ckan.org/en/latest/api/) for details. + +### Built-in: ArcGIS Hub + +For ArcGIS Hub open data portals (e.g., hub.arcgis.com, data-yourcity.hub.arcgis.com). + +**Configuration:** + +```yaml +plugins: + arcgis: + enabled: true + portal_url: "https://hub.arcgis.com" + city_name: "Your City" + timeout: 120 + token: "${ARCGIS_TOKEN}" # Optional: bearer token for private items +``` + +**Tools:** + +| Tool | Description | +|------|-------------| +| `arcgis__search_datasets(q, limit)` | Search the Hub catalog | +| `arcgis__get_dataset(dataset_id)` | Get metadata for a Hub item (32-char hex ID) | +| `arcgis__get_aggregations(field, q)` | Facet counts for type, tags, categories, or access | +| `arcgis__query_data(dataset_id, where, out_fields, limit)` | Query a Feature Service | + +### Built-in: Socrata + +For Socrata-based open data portals (e.g., data.cityofchicago.org, data.seattle.gov). + +**Configuration:** + +```yaml +plugins: + socrata: + enabled: true + base_url: "https://data.yourcity.gov" + portal_url: "https://data.yourcity.gov" + city_name: "Your City" + app_token: "${SOCRATA_APP_TOKEN}" # Recommended; register at dev.socrata.com + timeout: 30 +``` + +**Tools:** + +| Tool | Description | +|------|-------------| +| `socrata__search_datasets(query, limit)` | Search the portal catalog | +| `socrata__get_dataset(dataset_id)` | Get metadata for a dataset (4x4 ID) | +| `socrata__get_schema(dataset_id)` | Get column schema for constructing SoQL queries | +| `socrata__query_dataset(dataset_id, soql_query)` | Query data using SoQL | +| `socrata__list_categories()` | List all categories with dataset counts | +| `socrata__execute_sql(dataset_id, soql)` | Execute raw SoQL SELECT (advanced) | ### Custom Plugins @@ -104,7 +175,7 @@ mkdir -p custom_plugins/my_plugin cp custom_plugins/template/plugin_template.py custom_plugins/my_plugin/plugin.py ``` -Edit the plugin, add config to `config.yaml` (create from `config-example.yaml` if needed), then `./scripts/deploy.sh`. +Edit the plugin, add config to `config.yaml` (create from `config-example.yaml` if needed), then `opencontext deploy --env staging`. **Structure:** - Inherit from `MCPPlugin` (or `DataPlugin` for data sources) @@ -151,16 +222,18 @@ class MCPPlugin(ABC): | Endpoint | Auth | Use | |----------|------|-----| -| API Gateway | Rate limit, quota | Production | -| Lambda Function URL | None | Testing | +| API Gateway | Throttling + usage plan quota | Production | +| Lambda Function URL | None | Testing only | ## Configuration -Single `config.yaml`; passed to Lambda via `OPENCONTEXT_CONFIG`. Validated at deploy and runtime. +Single `config.yaml`; JSON-encoded and injected as the `OPENCONTEXT_CONFIG` Lambda environment variable at deploy time. Validated at deploy and runtime. ## Security & Scalability -- **API Gateway:** Rate limiting (100 burst, 50 sustained/s), configurable daily quota -- **Lambda URL:** Public—testing only +- **API Gateway:** Configurable throttling (default: 10 burst / 5 sustained req/s) and daily quota via `api_quota_limit`, `api_burst_limit`, `api_rate_limit` Terraform variables +- **Lambda URL:** Public — testing only; use API Gateway for production +- **X-Ray:** Active tracing on all Lambda invocations and API Gateway stage +- **SQS DLQ:** Failed async invocations written to `-dlq` for inspection - **Stateless:** No shared state; Lambda auto-scales -- **Logging:** CloudWatch, structured JSON, request IDs +- **Logging:** CloudWatch Logs, structured JSON, 14-day retention diff --git a/docs/BUILT_IN_PLUGINS.md b/docs/BUILT_IN_PLUGINS.md index 7931c72..a151e9f 100644 --- a/docs/BUILT_IN_PLUGINS.md +++ b/docs/BUILT_IN_PLUGINS.md @@ -1,6 +1,6 @@ # Built-in Plugins Reference -OpenContext includes built-in plugins for CKAN and Socrata open data portals. +OpenContext includes built-in plugins for CKAN, ArcGIS Hub, and Socrata open data portals. ## CKAN Plugin @@ -21,29 +21,20 @@ plugins: ### Tools -- `ckan__search_datasets(query, limit)` - Search for datasets -- `ckan__get_dataset(dataset_id)` - Get dataset metadata -- `ckan__query_data(resource_id, filters, limit)` - Query data from a resource -- `ckan__get_schema(resource_id)` - Get schema for a resource +| Tool | Description | +|------|-------------| +| `ckan__search_datasets(query, limit)` | Search for datasets | +| `ckan__get_dataset(dataset_id)` | Get dataset metadata | +| `ckan__query_data(resource_id, filters, limit)` | Query data from a resource | +| `ckan__get_schema(resource_id)` | Get schema for a resource | +| `ckan__execute_sql(sql)` | Execute PostgreSQL SELECT queries (advanced) | +| `ckan__aggregate_data(resource_id, metrics, group_by, filters, having, order_by, limit)` | Aggregate data with GROUP BY — supports `count(*)`, `sum()`, `avg()`, `min()`, `max()`, `stddev()` | -### Examples +### SQL Execution -**Search datasets:** -``` -Search for datasets about housing -``` - -**Get dataset:** -``` -Get details about the "311 Service Requests" dataset -``` +The `execute_sql` tool allows complex PostgreSQL queries (CTEs, window functions, joins). Only SELECT is allowed — INSERT, UPDATE, DELETE, DROP, and other destructive operations are blocked. Resource IDs must be valid UUIDs in double quotes: `FROM "uuid-here"`. -**Query data:** -``` -Query the first 10 records from resource abc123 -``` - -## CKAN API +### CKAN API This plugin uses CKAN's Action API: - `/api/3/action/package_search` - Search datasets @@ -52,11 +43,52 @@ This plugin uses CKAN's Action API: See [CKAN API documentation](https://docs.ckan.org/en/latest/api/) for details. +--- + +## ArcGIS Hub Plugin + +For ArcGIS Hub open data portals (e.g., hub.arcgis.com, data-yourcity.hub.arcgis.com). + +### Configuration + +```yaml +plugins: + arcgis: + enabled: true + portal_url: "https://hub.arcgis.com" # ArcGIS Hub portal URL + city_name: "Your City" # City/organization name + timeout: 120 # HTTP timeout in seconds + token: "${ARCGIS_TOKEN}" # Optional: bearer token for private items +``` + +### Tools + +| Tool | Description | +|------|-------------| +| `arcgis__search_datasets(q, limit)` | Search the Hub catalog | +| `arcgis__get_dataset(dataset_id)` | Get metadata for a Hub item (32-char hex ID) | +| `arcgis__get_aggregations(field, q)` | Facet counts for type, tags, categories, or access | +| `arcgis__query_data(dataset_id, where, out_fields, limit)` | Query a Feature Service | + +### Usage Notes + +- `get_dataset` returns the Hub item metadata. Check that the item has a queryable `serviceUrl` before calling `query_data`. +- `get_aggregations` accepts `field` values: `"type"`, `"tags"`, `"categories"`, `"access"`. +- `query_data` uses the ArcGIS Feature Service query interface. The `where` parameter is a SQL WHERE clause (e.g., `"population > 10000"`). Only Feature Layer, Feature Service, Map Service, and Table types are queryable. + +### ArcGIS API + +This plugin uses two API layers: +- **Hub Search API** (OGC API - Records) — catalog search and aggregations +- **ArcGIS Feature Service** query endpoint — data queries + +--- + ## Socrata Plugin For Socrata-based open data portals (e.g., data.cityofchicago.org, data.cityofnewyork.us, data.seattle.gov). -**Note:** Socrata requires a free app token. Register at [https://dev.socrata.com/register](https://dev.socrata.com/register). +**Note:** A Socrata app token is **required**. Register for a free token at [https://dev.socrata.com/register](https://dev.socrata.com/register). ### Configuration @@ -68,57 +100,47 @@ plugins: portal_url: "https://data.yourcity.gov" city_name: "Your City" app_token: "${SOCRATA_APP_TOKEN}" # Required - timeout: 30.0 # HTTP timeout (default: 30) + timeout: 30 # HTTP timeout in seconds (default: 30) ``` ### Tools -- `socrata__search_datasets(query, limit)` - Search for datasets in the portal catalog -- `socrata__get_dataset(dataset_id)` - Get full metadata for a dataset (4x4 ID) -- `socrata__get_schema(dataset_id)` - Get column schema for constructing SoQL queries -- `socrata__query_dataset(dataset_id, soql_query)` - Query data using SoQL -- `socrata__execute_sql(dataset_id, soql)` - Execute raw SoQL query (advanced, similar to CKAN execute_sql) -- `socrata__list_categories()` - List all categories with dataset counts - -### Examples - -**Search datasets:** -``` -Search for datasets about housing -``` +| Tool | Description | +|------|-------------| +| `socrata__search_datasets(query, limit)` | Search for datasets in the portal catalog | +| `socrata__get_dataset(dataset_id)` | Get full metadata for a dataset (4x4 ID) | +| `socrata__get_schema(dataset_id)` | Get column schema for constructing SoQL queries | +| `socrata__query_dataset(dataset_id, soql_query)` | Query data using SoQL | +| `socrata__list_categories()` | List all categories with dataset counts | +| `socrata__execute_sql(dataset_id, soql)` | Execute raw SoQL SELECT (advanced) | -**Get dataset:** -``` -Get details about dataset wc4w-4jew -``` +### Typical Workflow -**Get schema (call before query_dataset):** ``` -Get schema for dataset wc4w-4jew +list_categories → search_datasets → get_dataset → get_schema → query_dataset ``` -**Query data:** -``` -Query dataset wc4w-4jew with: SELECT * WHERE year > 2020 LIMIT 50 -``` +### SoQL Notes -**List categories:** -``` -List all dataset categories on the open data portal -``` +- `GROUP BY` is required whenever using `COUNT()` or any aggregation. +- Boolean fields use `= true` / `= false`, not `= 'Y'` or `= 1`. +- For conditional counts: `SUM(CASE WHEN col = true THEN 1 ELSE 0 END)`. +- `LIMIT` caps returned rows and can affect aggregation results. ### Socrata API This plugin uses two Socrata API layers: -- **Discovery API** (api.us.socrata.com) - Catalog search, categories -- **SODA3** (portal domain) - Dataset metadata, schema, data queries +- **Discovery API** (api.us.socrata.com) — catalog search, categories +- **SODA3** (portal domain) — dataset metadata, schema, data queries See [Socrata developer documentation](https://dev.socrata.com/) for details. +--- + ## Custom Plugins -If your portal doesn't use CKAN, you can create a custom plugin. See [Custom Plugins Guide](CUSTOM_PLUGINS.md) for instructions. +If your portal doesn't use CKAN, ArcGIS Hub, or Socrata, you can create a custom plugin. See [Custom Plugins Guide](CUSTOM_PLUGINS.md) for instructions. ## Examples -See [examples/](../examples/) for complete configuration examples. +See [examples/](../examples/) for complete configuration examples per city. diff --git a/docs/CLI.md b/docs/CLI.md new file mode 100644 index 0000000..704fda1 --- /dev/null +++ b/docs/CLI.md @@ -0,0 +1,314 @@ +# CLI Reference + +The `opencontext` CLI manages the full lifecycle of an OpenContext MCP server — from initial setup through deployment, monitoring, and teardown. + +## Installation + +```bash +git clone https://github.com/thealphacubicle/OpenContext.git +cd OpenContext +pip install -e ".[cli]" +``` + +Verify: + +```bash +opencontext --help +``` + +## Global behavior + +- Commands that modify infrastructure (`deploy`, `destroy`) require a TTY and prompt for confirmation. +- All commands that interact with AWS or Terraform respect the environment set by `--env`. +- `--env` defaults to `staging` on every command that accepts it. + +--- + +## Commands + +### `opencontext authenticate` + +Check all prerequisites and print a status table. Auto-installs `uv` and `awscli` if missing. + +**Checks:** +1. Python >= 3.11 +2. `uv` (auto-installs via pip if missing) +3. AWS CLI (auto-installs via uv/pip if missing) +4. AWS credentials (`aws sts get-caller-identity`) +5. Terraform >= 1.0 + +```bash +opencontext authenticate +``` + +--- + +### `opencontext configure` + +Interactive wizard that creates `config.yaml`, the Terraform `.tfvars` file, and initializes the Terraform workspace. + +**Prompts:** +- Starting point (example template or scratch) +- Organization name and city +- Environment (`staging` or `prod`) +- Plugin (CKAN, ArcGIS, or Socrata) and connection settings +- AWS region, Lambda name, memory (MB), and timeout (seconds) +- Optional custom domain + +**Outputs:** +- `config.yaml` — plugin and Lambda settings +- `terraform/aws/.tfvars` — Terraform variables +- Terraform workspace `-` (created or selected) + +```bash +opencontext configure +``` + +--- + +### `opencontext serve` + +Start a local development server for testing without deploying to AWS. The server uses the same MCP handler as the Lambda adapter, so behavior is identical to production. + +```bash +opencontext serve +opencontext serve --port 9000 +opencontext serve --config path/to/config.yaml +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--port` | `8000` | Port to listen on | +| `--config` | `config.yaml` | Path to config file | + +The server starts at `http://localhost:/mcp`. Use it with Claude Desktop (via Claude Connectors), `opencontext test --url`, or any HTTP client. Logs are written to stdout at the level set in `config.yaml`. + +--- + +### `opencontext deploy` + +Package the Lambda deployment zip, run `terraform plan`, show a summary, prompt for confirmation, then apply. + +```bash +opencontext deploy --env staging +opencontext deploy --env prod +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--env` | `staging` | Environment to deploy to | + +**What it does:** +1. Runs all validation checks (same as `opencontext validate`) +2. Installs Python dependencies into `.deploy/` using `uv pip install` +3. Copies `core/`, `plugins/`, `server/`, and `custom_plugins/` into the zip +4. Copies the zip and `config.yaml` into `terraform/aws/` +5. Runs `terraform plan` and shows add/change/destroy counts +6. Prompts for confirmation (defaults to No) +7. Runs `terraform apply` +8. Prints API Gateway URL, CloudWatch log group, and custom domain details + +After deployment, the API Gateway URL includes `/mcp` and is ready to use with Claude Connectors. + +--- + +### `opencontext status` + +Show deployment status for an environment: Lambda info, API Gateway URL, custom domain, and certificate status. + +```bash +opencontext status --env staging +opencontext status --env prod +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--env` | `staging` | Environment to query | + +--- + +### `opencontext validate` + +Run pre-deployment validation checks without deploying. Useful for CI or before a deploy. + +```bash +opencontext validate --env staging +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--env` | `staging` | Environment to validate against | + +**Checks:** +1. `config.yaml` exists +2. Exactly one plugin enabled +3. Plugin required fields present +4. `terraform/aws/.tfvars` exists +5. Terraform installed +6. Terraform initialized (`.terraform/` directory present) +7. `terraform validate` passes +8. AWS credentials valid +9. ACM certificate exists (only if `custom_domain` is set in tfvars) + +Exits with code 1 if any check fails. + +--- + +### `opencontext test` + +Send MCP JSON-RPC requests to the deployed server and report results. + +```bash +opencontext test --env staging +opencontext test --url https://my-lambda-url.lambda-url.us-east-1.on.aws +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--env` | `staging` | Environment to test (fetches URL from Terraform output) | +| `--url` | — | Override URL to test against (skips Terraform lookup) | + +**Tests run:** +1. Ping +2. Initialize (MCP protocol handshake) +3. List tools +4. Call first available tool with empty arguments + +If a custom domain is configured and its certificate is `ISSUED`, the command also tests against the custom domain URL. + +--- + +### `opencontext logs` + +Tail CloudWatch logs for the deployed Lambda. + +```bash +opencontext logs --env staging +opencontext logs --env staging --follow +opencontext logs --env staging --verbose +opencontext logs --env staging --since 30m +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--env` | `staging` | Environment to fetch logs for | +| `--follow`, `-f` | False | Stream new log entries as they arrive | +| `--verbose`, `-v` | False | Show structured per-invocation view with duration and error highlighting | +| `--since` | `1h` | How far back to fetch (e.g., `30m`, `2h`, `24h`) | + +Without `--verbose`, log lines are printed with START entries highlighted in cyan and ERROR lines highlighted in red. With `--verbose`, invocations are grouped with request ID, duration, and status. + +--- + +### `opencontext domain` + +Check and manage custom domain setup. Shows certificate status, DNS records to create, and (if the certificate is issued) tests the domain is live. + +```bash +opencontext domain --env staging +opencontext domain --env prod +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--env` | `staging` | Environment to check | + +When the certificate is `PENDING_VALIDATION`, this command prints the two CNAME records that city IT needs to create, plus a pre-filled email template to send them. + +--- + +### `opencontext architecture` + +Print a human-readable overview of the AWS infrastructure in the terminal — request flow, API Gateway settings, Lambda config, supporting services (CloudWatch, SQS DLQ, Terraform state), custom domain, and resource tagging. + +```bash +opencontext architecture +``` + +--- + +### `opencontext plugin list` + +List all built-in and custom plugins and their enabled/disabled status from `config.yaml`. + +```bash +opencontext plugin list +``` + +--- + +### `opencontext security` + +Run a `pip-audit` vulnerability scan against installed packages and display results grouped by severity (CRITICAL, HIGH, MEDIUM, LOW). Exits with code 1 if any vulnerabilities are found. + +```bash +opencontext security +opencontext security --export +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--export` | False | Write report to `security-report-.txt` | + +Requires `pip-audit` to be installed as a dev dependency (`uv add --dev pip-audit`). + +--- + +### `opencontext cost` + +Estimate AWS costs for an environment based on CloudWatch metrics. + +```bash +opencontext cost --env staging +opencontext cost --env prod --days 7 +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--env` | `staging` | Environment to estimate costs for | +| `--days` | `30` | Number of days to look back | + +Reports Lambda invocations, average duration, API Gateway request count, and estimated costs. Uses AWS public pricing — check AWS Cost Explorer for exact figures. + +To break costs down by tag in Cost Explorer, activate the `Project`, `Environment`, and `ManagedBy` tags in AWS Console → Billing → Cost allocation tags. + +--- + +### `opencontext upgrade` + +Merge updates from the upstream OpenContext template into your fork. + +```bash +opencontext upgrade +opencontext upgrade --upstream-url https://github.com/thealphacubicle/OpenContext.git +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--upstream-url` | upstream repo URL | URL of the upstream template repository | + +**What it does:** +1. Adds an `upstream` git remote if not present +2. Fetches `upstream/main` +3. Shows new commits and affected files +4. Warns about protected files (`config.yaml`, `terraform/aws/*.tfvars`, `examples/`) that will not be overwritten +5. Prompts for confirmation, then runs `git merge upstream/main --no-commit --no-ff` +6. Auto-resolves conflicts in protected files by keeping your version +7. Leaves the merge staged — run `git commit` to finalize or `git merge --abort` to cancel + +--- + +### `opencontext destroy` + +Tear down all AWS resources for an environment. Requires typing the environment name to confirm. + +```bash +opencontext destroy --env staging +opencontext destroy --env prod +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--env` | `staging` | Environment to destroy | + +Runs `terraform destroy -auto-approve` after confirmation. This is irreversible — all Lambda, API Gateway, IAM, and CloudWatch resources for the workspace are removed. diff --git a/docs/CUSTOM_PLUGINS.md b/docs/CUSTOM_PLUGINS.md index 61ff09c..0851707 100644 --- a/docs/CUSTOM_PLUGINS.md +++ b/docs/CUSTOM_PLUGINS.md @@ -27,7 +27,7 @@ Custom plugins allow you to integrate OpenContext with your own APIs, databases, api_key: "${MY_API_KEY}" ``` -4. Deploy: `./scripts/deploy.sh` +4. Deploy: `opencontext deploy --env staging` ## Plugin Structure @@ -134,25 +134,88 @@ async def health_check(self) -> bool: ## DataPlugin Interface -If your plugin provides data operations, inherit from `DataPlugin` instead: +If your plugin provides data operations, inherit from `DataPlugin` instead of `MCPPlugin` directly. + +`DataPlugin` extends `MCPPlugin`, so a `DataPlugin` subclass must implement **all 8 abstract methods** — the 5 from `MCPPlugin` plus the 3 defined on `DataPlugin` itself. Omitting any of these will raise a `TypeError` at startup. + +### The 5 required methods inherited from `MCPPlugin` + +These are the same methods documented in the [Required Methods](#required-methods) section above. `DataPlugin` does not override or relax any of them: + +| Method | Signature | +|---|---| +| `initialize` | `async def initialize(self) -> bool` | +| `shutdown` | `async def shutdown(self) -> None` | +| `get_tools` | `def get_tools(self) -> List[ToolDefinition]` | +| `execute_tool` | `async def execute_tool(self, tool_name: str, arguments: Dict[str, Any]) -> ToolResult` | +| `health_check` | `async def health_check(self) -> bool` | + +### The 3 additional methods defined by `DataPlugin` + +| Method | Signature | +|---|---| +| `search_datasets` | `async def search_datasets(self, query: str, limit: int = 20) -> List[Dict[str, Any]]` | +| `get_dataset` | `async def get_dataset(self, dataset_id: str) -> Dict[str, Any]` | +| `query_data` | `async def query_data(self, resource_id: str, filters: Optional[Dict[str, Any]] = None, limit: int = 100) -> List[Dict[str, Any]]` | + +### Minimal skeleton ```python -from core.interfaces import DataPlugin +from typing import Any, Dict, List, Optional +from core.interfaces import DataPlugin, PluginType, ToolDefinition, ToolResult class MyDataPlugin(DataPlugin): - async def search_datasets(self, query: str, limit: int = 20): - # Implement dataset search + plugin_name = "my_data" + plugin_type = PluginType.OPEN_DATA + plugin_version = "1.0.0" + + def __init__(self, config: Dict[str, Any]) -> None: + super().__init__(config) + + # --- 5 required methods from MCPPlugin --- + + async def initialize(self) -> bool: + # Create clients, validate config, set self._initialized = True + self._initialized = True + return True + + async def shutdown(self) -> None: + # Close clients, release resources + self._initialized = False + + def get_tools(self) -> List[ToolDefinition]: + # Return ToolDefinition objects for each tool this plugin exposes + return [] + + async def execute_tool(self, tool_name: str, arguments: Dict[str, Any]) -> ToolResult: + # Dispatch to the correct tool implementation + return ToolResult(content=[], success=False, error_message=f"Unknown tool: {tool_name}") + + async def health_check(self) -> bool: + return self._initialized + + # --- 3 required methods from DataPlugin --- + + async def search_datasets(self, query: str, limit: int = 20) -> List[Dict[str, Any]]: + # Return a list of dataset metadata dicts matching the query pass - async def get_dataset(self, dataset_id: str): - # Implement dataset retrieval + async def get_dataset(self, dataset_id: str) -> Dict[str, Any]: + # Return full metadata for a single dataset pass - async def query_data(self, resource_id: str, filters: Optional[Dict] = None, limit: int = 100): - # Implement data querying + async def query_data( + self, + resource_id: str, + filters: Optional[Dict[str, Any]] = None, + limit: int = 100, + ) -> List[Dict[str, Any]]: + # Return records from the specified resource pass ``` +See `custom_plugins/template/plugin_template.py` for the canonical, fully-annotated starting point (it inherits `MCPPlugin` directly, which is fine for non-data plugins). + ## Best Practices ### Error Handling diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md index 761b68c..b073c32 100644 --- a/docs/DEPLOYMENT.md +++ b/docs/DEPLOYMENT.md @@ -15,7 +15,10 @@ Run `opencontext authenticate` to verify all prerequisites before deploying. - Lambda (create, update functions) - IAM (roles, policies) - CloudWatch Logs -- API Gateway / Lambda Function URLs +- API Gateway +- SQS (Dead Letter Queue for Lambda failures) +- X-Ray (tracing, via AWSXRayDaemonWriteAccess) +- ACM (only required when configuring a custom domain) ## Deployment @@ -61,12 +64,13 @@ The `opencontext configure` command generates the `.tfvars` file. For manual run ## Endpoints +All traffic — development, staging, and production — goes through the API Gateway URL. There is no separate no-auth endpoint. + | Endpoint | Use Case | Auth | |----------|----------|------| -| **API Gateway** | Production | Rate limiting, daily quota | -| **Lambda Function URL** | Testing | None | +| **API Gateway** | All environments | Rate limiting, daily quota | -### Get URLs +### Get the URL ```bash # Via CLI @@ -74,17 +78,15 @@ opencontext status --env staging # Via Terraform directly cd terraform/aws -terraform output -raw api_gateway_url # Production (includes /mcp) -terraform output -raw lambda_url # Testing +terraform output -raw api_gateway_url # Includes /mcp suffix ``` ### API Gateway -- **Rate limit:** 100 burst, 50 sustained req/s (configurable via Terraform variables) -- **Daily quota:** 1000 requests/day (configurable via `api_quota_limit`) +- **Throttling:** Default 10 burst / 5 sustained req/s; configurable via `api_burst_limit` and `api_rate_limit` Terraform variables +- **Daily quota:** Configurable via `api_quota_limit` Terraform variable - **Stage name:** Default is `staging`; URL format: `https://...execute-api.region.amazonaws.com/staging/mcp` -- **429** when exceeded -- Use for production; Lambda URL has no auth +- **HTTP 429** when rate or quota is exceeded ## Configuration @@ -121,7 +123,7 @@ This runs `terraform destroy` with a confirmation prompt. You must type the envi ## Cost (us-east-1) - Lambda: ~$0.20/1M requests, ~$0.0000166667/GB-second -- Function URL: Free +- API Gateway: ~$3.50/1M requests - Example: 100K req/month, 512 MB, 1s avg ≈ **$1/month** ## Troubleshooting @@ -136,6 +138,5 @@ This runs `terraform destroy` with a confirmation prompt. You must type the envi ## Security -- Use API Gateway for production (rate limiting, quota) -- Lambda URL is public — testing only +- API Gateway enforces rate limiting and daily quotas for all environments - Store secrets in env vars, not code diff --git a/docs/FAQ.md b/docs/FAQ.md index 2f5a1a4..2d1491f 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -36,7 +36,7 @@ Yes, but they must be resolved before deployment. Terraform will set the final c ### What if I need to change configuration after deployment? -Edit `config.yaml` and run `./scripts/deploy.sh` again. Terraform will update the Lambda environment variable. +Edit `config.yaml` and run `opencontext deploy --env staging` again. Terraform will update the Lambda environment variable. ## Plugins @@ -44,7 +44,9 @@ Edit `config.yaml` and run `./scripts/deploy.sh` again. Terraform will update th Built-in plugins: -- **CKAN** - For CKAN-based portals (e.g., data.gov, data.gov.uk) +- **CKAN** — For CKAN-based portals (e.g., data.gov, data.gov.uk) +- **ArcGIS Hub** — For ArcGIS Hub portals (e.g., hub.arcgis.com) +- **Socrata** — For Socrata portals (e.g., data.cityofchicago.org) You can also create custom plugins in `custom_plugins/`. @@ -75,7 +77,7 @@ The current implementation is AWS-specific. Contributions for other providers ar ### How do I update an existing deployment? -Run `./scripts/deploy.sh` again. Terraform will update the Lambda function. +Run `opencontext deploy --env staging` again. Terraform will update the Lambda function. ## Usage @@ -139,14 +141,11 @@ curl -X POST https://your-lambda-url \ **Option 1: Use the local server** ```bash -# Install aiohttp if needed -pip install aiohttp - # Start local server -python3 local_server.py +opencontext serve # In another terminal, test with curl -curl -X POST http://localhost:8000 \ +curl -X POST http://localhost:8000/mcp \ -H "Content-Type: application/json" \ -d '{"jsonrpc":"2.0","id":1,"method":"tools/list"}' ``` diff --git a/docs/GETTING_STARTED.md b/docs/GETTING_STARTED.md index 3eec024..a6e6318 100644 --- a/docs/GETTING_STARTED.md +++ b/docs/GETTING_STARTED.md @@ -77,8 +77,7 @@ Each deployment connects to one data source. To connect another source, deploy a ### 2. Start the Local Server ```bash -pip install aiohttp -python3 scripts/local_server.py +opencontext serve ``` The server runs at `http://localhost:8000/mcp`. Keep this terminal open. @@ -161,7 +160,7 @@ cd terraform/aws terraform output -raw api_gateway_url ``` -The output already includes `/mcp`. Use the API Gateway URL for production (rate limiting, API key). For testing without auth, use the Lambda URL from `terraform output -raw lambda_url` instead. +The output already includes `/mcp`. Use the API Gateway URL for all testing and production traffic. ### 4. Updating @@ -175,15 +174,24 @@ opencontext deploy --env staging ## CLI Reference +See [CLI Guide](CLI.md) for full flag documentation. + | Command | Description | |---------|-------------| | `opencontext authenticate` | Check prerequisites (Python, uv, AWS CLI, credentials, Terraform) | | `opencontext configure` | Interactive wizard: creates `config.yaml`, `.tfvars`, and Terraform workspace | +| `opencontext serve` | Start local dev server at `http://localhost:8000/mcp` (no AWS required) | | `opencontext deploy --env ` | Package Lambda, plan changes, confirm, and deploy | | `opencontext status --env ` | Show deployment status, URLs, and cert status | | `opencontext validate --env ` | Run pre-deployment checks without deploying | | `opencontext test --env ` | Test the deployed MCP server endpoints | -| `opencontext logs --env ` | Tail CloudWatch logs (`--follow` to stream) | +| `opencontext logs --env ` | Tail CloudWatch logs (`--follow` to stream, `--verbose` for structured view) | +| `opencontext domain --env ` | Check custom domain and certificate status | +| `opencontext architecture` | Show AWS architecture diagram in the terminal | +| `opencontext plugin list` | List all plugins and their enabled/disabled status | +| `opencontext security` | Run a pip-audit vulnerability scan (`--export` to save report) | +| `opencontext cost --env ` | Estimate AWS costs from CloudWatch metrics (`--days` to adjust window) | +| `opencontext upgrade` | Merge updates from the upstream OpenContext template | | `opencontext destroy --env ` | Tear down all deployed resources | --- @@ -192,7 +200,6 @@ opencontext deploy --env staging | Issue | Solution | |-------|----------| -| `ModuleNotFoundError: aiohttp` | `pip install aiohttp` | | "Multiple Plugins Enabled" | Enable only one plugin in `config.yaml` | | Claude can't connect | Verify URL includes `/mcp`, check connector is enabled in the chat | | Lambda 500 error | Check CloudWatch logs: `opencontext logs --env staging` | @@ -203,9 +210,11 @@ opencontext deploy --env staging ## Next Steps -- [Architecture](ARCHITECTURE.md) – System design, built-in plugins, custom plugins -- [Deployment](DEPLOYMENT.md) – AWS details, monitoring, cost -- [Testing](TESTING.md) – Local testing (Terminal, Claude, MCP Inspector) +- [CLI Reference](CLI.md) — All commands and flags in detail +- [Architecture](ARCHITECTURE.md) — System design, built-in plugins, custom plugins +- [Built-in Plugins](BUILT_IN_PLUGINS.md) — CKAN, ArcGIS Hub, and Socrata tool reference +- [Deployment](DEPLOYMENT.md) — AWS details, monitoring, cost +- [Testing](TESTING.md) — Local testing (Terminal, Claude, MCP Inspector) --- diff --git a/docs/QUICKSTART.md b/docs/QUICKSTART.md index 66e500b..defe983 100644 --- a/docs/QUICKSTART.md +++ b/docs/QUICKSTART.md @@ -36,7 +36,7 @@ opencontext configure The interactive wizard prompts for: - Organization name and city -- Plugin (CKAN, Socrata, or ArcGIS) and data source URL +- Plugin (CKAN, ArcGIS, or Socrata) and data source URL - AWS region and Lambda settings - Optional custom domain @@ -84,8 +84,7 @@ opencontext status --env staging ```bash # Start local server -pip install aiohttp -python3 scripts/local_server.py +opencontext serve # In another terminal, test with curl curl -X POST http://localhost:8000/mcp \ diff --git a/docs/TESTING.md b/docs/TESTING.md index 61fdeb7..b70acdb 100644 --- a/docs/TESTING.md +++ b/docs/TESTING.md @@ -7,8 +7,7 @@ This guide covers three ways to test your OpenContext server locally. Before testing: 1. Create `config.yaml` from `config-example.yaml` and enable exactly one plugin -2. Install dependencies: `pip install aiohttp` -3. Start the server: `python3 scripts/local_server.py` +2. Start the server: `opencontext serve` The server runs at `http://localhost:8000/mcp`. Keep it running while you test. @@ -45,7 +44,7 @@ curl -X POST http://localhost:8000/mcp \ For a full test (initialize, list tools, call tool), run: ```bash -./scripts/test_streamable_http.sh +opencontext test --url http://localhost:8000/mcp ``` --- @@ -120,11 +119,11 @@ pytest --cov=core --cov=plugins ## Testing Against Production -To test a deployed server, use the Lambda URL or API Gateway URL: +To test a deployed server, use the API Gateway URL: ```bash -LAMBDA_URL="https://your-lambda-url.lambda-url.us-east-1.on.aws" -curl -X POST $LAMBDA_URL/mcp \ +API_GW_URL=$(cd terraform/aws && terraform output -raw api_gateway_url) +curl -X POST $API_GW_URL \ -H "Content-Type: application/json" \ -d '{"jsonrpc":"2.0","id":1,"method":"ping"}' ``` diff --git a/local_server.py b/local_server.py deleted file mode 100644 index 08c52fb..0000000 --- a/local_server.py +++ /dev/null @@ -1,99 +0,0 @@ -# run_local_server.py -"""Run OpenContext MCP server locally for testing (no Lambda needed).""" - -import asyncio -import json - -import yaml -from aiohttp import web - -from core.plugin_manager import PluginManager -from core.mcp_server import MCPServer - -# Load config -with open("config.yaml") as f: - config = yaml.safe_load(f) - -# Global server instance -_plugin_manager = None -_mcp_server = None - - -async def init_server(): - """Initialize server on startup.""" - global _plugin_manager, _mcp_server - - print("🚀 Initializing OpenContext MCP Server locally...") - - # Initialize Plugin Manager - _plugin_manager = PluginManager(config) - await _plugin_manager.load_plugins() - - # Initialize MCP Server - _mcp_server = MCPServer(_plugin_manager) - - print("✅ Server initialized successfully") - print(f"Loaded plugins: {list(_plugin_manager.plugins.keys())}") - print(f"Available tools: {len(_plugin_manager.get_all_tools())}") - - -async def handle_mcp_request(request): - """Handle MCP JSON-RPC request.""" - try: - body = await request.text() - headers = dict(request.headers) - - # Use the same handler as Lambda - response = await _mcp_server.handle_http_request(body, headers) - - return web.Response( - text=response.get("body", "{}"), - status=response.get("statusCode", 200), - headers=response.get("headers", {}), - ) - - except Exception as e: - return web.Response( - text=json.dumps( - { - "jsonrpc": "2.0", - "id": None, - "error": {"code": -32603, "message": str(e)}, - } - ), - status=500, - headers={"Content-Type": "application/json"}, - ) - - -async def start_server(): - """Start local HTTP server.""" - await init_server() - - app = web.Application() - app.router.add_post("/", handle_mcp_request) - - runner = web.AppRunner(app) - await runner.setup() - site = web.TCPSite(runner, "localhost", 8000) - await site.start() - - print("\n" + "=" * 50) - print("🌐 Local MCP Server running!") - print("=" * 50) - print("URL: http://localhost:8000") - print("\nTest with:") - print(" opencontext-client http://localhost:8000") - print("\nPress Ctrl+C to stop") - print("=" * 50 + "\n") - - # Keep running - try: - await asyncio.Event().wait() - except KeyboardInterrupt: - print("\n👋 Shutting down...") - await _plugin_manager.shutdown() - - -if __name__ == "__main__": - asyncio.run(start_server()) diff --git a/pyproject.toml b/pyproject.toml index 98b9578..f3b659a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,9 @@ dependencies = [ "pygments>=2.20.0", "requests>=2.33.0", "pre-commit>=4.5.1", + "boto3>=1.42.83", + "botocore>=1.42.83", + "click>=8.3.1", ] [project.optional-dependencies] diff --git a/requirements-dev.txt b/requirements-dev.txt index c793a78..9dbcb9b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -6,6 +6,20 @@ # Testing pytest>=7.0.0 pytest-asyncio>=0.21.0 +pytest-cov>=4.0.0 +pytest-xdist>=3.0.0 +click>=8.3.1 + +# Linting / auditing +ruff>=0.1.0 +pip-audit>=2.0.0 + +# CLI +typer>=0.9.0 +questionary>=2.0.0 +rich>=13.0.0 +boto3>=1.42.83 +botocore>=1.42.83 # Pre-commit hooks (run: pre-commit install) pre-commit>=4.0.0 diff --git a/requirements.txt b/requirements.txt index d6ea9c1..d93cf9b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ sqlparse>=0.4.4 aiohttp>=3.13.4 pygments>=2.20.0 requests>=2.33.0 +pre-commit>=4.5.1 diff --git a/scripts/README.md b/scripts/README.md deleted file mode 100644 index 6c63436..0000000 --- a/scripts/README.md +++ /dev/null @@ -1,109 +0,0 @@ -# Scripts Directory - -This directory contains utility scripts for OpenContext MCP server. - -## Scripts - -### `deploy.sh` - -Deployment script that validates configuration and deploys the MCP server to AWS Lambda. - -**Usage:** -```bash -./scripts/deploy.sh --environment [--tfworkspace ] -``` - -**Options:** - -| Flag | Short | Required | Description | -|------|-------|----------|-------------| -| `--environment` | `-e` | Yes | `staging` or `prod` | -| `--tfworkspace` | `-w` | No | Terraform workspace name (default: `opencontext-staging` or `opencontext-prod`) | -| `--help` | `-h` | No | Show help | - -**Examples:** -```bash -./scripts/deploy.sh --environment staging -./scripts/deploy.sh -e prod -./scripts/deploy.sh --environment staging --tfworkspace my-workspace -./scripts/deploy.sh -e prod -w my-org-prod -``` - -**What it does:** -- Validates that exactly ONE plugin is enabled -- Packages the code for Lambda deployment -- Selects (or creates) the specified Terraform workspace -- Deploys to AWS using Terraform -- Outputs the API Gateway URL and Lambda Function URL - -**Requirements:** -- Python 3.11+ -- AWS CLI configured -- Terraform installed -- Valid `config.yaml` in project root (create from `config-example.yaml`) - -### `local_server.py` - -Local development server for testing the MCP server without deploying to Lambda. - -**Usage:** -```bash -python3 scripts/local_server.py -``` - -**What it does:** -- Starts a local HTTP server on `http://localhost:8000/mcp` -- Supports Streamable HTTP transport with session management -- Provides detailed logging for debugging -- Uses the same MCP server logic as Lambda deployment - -**Requirements:** -- Python 3.11+ -- `aiohttp` package (`pip install aiohttp`) -- Valid `config.yaml` in project root (create from `config-example.yaml`) - -**Testing:** -```bash -# Test with curl -curl -X POST http://localhost:8000/mcp \ - -H "Content-Type: application/json" \ - -d '{"jsonrpc":"2.0","id":1,"method":"ping"}' - -# Or use the test script -./scripts/test_streamable_http.sh -``` - -### `test_streamable_http.sh` - -Test script for Streamable HTTP transport. Tests the full MCP lifecycle. - -**Usage:** -```bash -./scripts/test_streamable_http.sh [BASE_URL] -``` - -**Default:** `http://localhost:8000/mcp` - -**What it tests:** -1. Initialize connection and extract session ID -2. List available tools -3. Call a tool (`ckan__search_datasets`) - -**Requirements:** -- `jq` installed (`brew install jq` on macOS) -- MCP server running (local or deployed) - -**Example:** -```bash -# Test local server -./scripts/test_streamable_http.sh - -# Test deployed Lambda -./scripts/test_streamable_http.sh https://your-lambda-url.lambda-url.us-east-1.on.aws/mcp -``` - -## Notes - -- All scripts should be run from the project root directory -- Scripts automatically handle path resolution relative to their location -- Make sure scripts are executable: `chmod +x scripts/*.sh` diff --git a/scripts/deploy.sh b/scripts/deploy.sh deleted file mode 100755 index d21a3c5..0000000 --- a/scripts/deploy.sh +++ /dev/null @@ -1,353 +0,0 @@ -#!/bin/bash -# OpenContext Deployment Script -# -# This script validates configuration and deploys the MCP server to AWS Lambda. -# It enforces the "one fork = one MCP server" rule. - -set -euo pipefail - -# Colors for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' # No Color - -# Script directory (scripts/) -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -# Project root (parent directory) -PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" -cd "$PROJECT_ROOT" - -# Parse named arguments -ENVIRONMENT="" -TF_WORKSPACE="" - -show_usage() { - echo "Usage: $0 --environment [--tfworkspace ]" - echo "" - echo "Options:" - echo " --environment, -e Deployment environment: staging or prod (required)" - echo " --tfworkspace, -w Terraform workspace name (default: opencontext-staging or opencontext-prod)" - echo " --help, -h Show this help message" -} - -while [[ $# -gt 0 ]]; do - case "$1" in - --environment|-e) - ENVIRONMENT="$2" - shift 2 - ;; - --tfworkspace|-w) - TF_WORKSPACE="$2" - shift 2 - ;; - --help|-h) - show_usage - exit 0 - ;; - *) - echo -e "${RED}❌ Error: Unknown argument '${1}'${NC}" - show_usage - exit 1 - ;; - esac -done - -if [ -z "$ENVIRONMENT" ]; then - echo -e "${RED}❌ Error: --environment is required${NC}" - show_usage - exit 1 -fi - -if [ "$ENVIRONMENT" != "staging" ] && [ "$ENVIRONMENT" != "prod" ]; then - echo -e "${RED}❌ Error: Invalid environment '${ENVIRONMENT}'. Must be 'staging' or 'prod'.${NC}" - show_usage - exit 1 -fi - -# Default workspace per environment when not explicitly provided -if [ -z "$TF_WORKSPACE" ]; then - if [ "$ENVIRONMENT" = "prod" ]; then - TF_WORKSPACE="opencontext-prod" - else - TF_WORKSPACE="opencontext-staging" - fi -fi - -echo -e "${GREEN}🚀 OpenContext Deployment [${ENVIRONMENT}] (workspace: ${TF_WORKSPACE})${NC}" -echo "================================" -echo "" - -# Check if config.yaml exists -if [ ! -f "config.yaml" ]; then - echo -e "${RED}❌ Error: config.yaml not found${NC}" - echo "Create config from template: cp config-example.yaml config.yaml" - echo "Then edit config.yaml and enable exactly ONE plugin." - exit 1 -fi - -# Check if Python is available -if ! command -v python3 &> /dev/null; then - echo -e "${RED}❌ Error: python3 not found${NC}" - echo "Please install Python 3.11 or later." - exit 1 -fi - -# Check if Terraform is available -if ! command -v terraform &> /dev/null; then - echo -e "${RED}❌ Error: terraform not found${NC}" - echo "Please install Terraform: https://www.terraform.io/downloads" - exit 1 -fi - -echo -e "${YELLOW}📋 Step 1: Validating configuration...${NC}" - -# Count enabled plugins using Python for reliable YAML parsing -ENABLED_COUNT=$(python3 << 'EOF' -import yaml -import sys - -try: - with open('config.yaml', 'r') as f: - config = yaml.safe_load(f) - - plugins = config.get('plugins', {}) - enabled = [] - - for plugin_name, plugin_config in plugins.items(): - if isinstance(plugin_config, dict) and plugin_config.get('enabled', False): - enabled.append(plugin_name) - - count = len(enabled) - - if count == 0: - print("0", file=sys.stderr) - print("No plugins enabled", file=sys.stderr) - sys.exit(1) - elif count > 1: - print(str(count), file=sys.stderr) - print(" ".join(enabled), file=sys.stderr) - sys.exit(2) - else: - print(count) - print(enabled[0], file=sys.stderr) - sys.exit(0) - -except Exception as e: - print(f"Error parsing config.yaml: {e}", file=sys.stderr) - sys.exit(3) -EOF -) - -EXIT_CODE=$? - -if [ $EXIT_CODE -eq 1 ]; then - echo -e "${RED}❌ Configuration Error: No Plugins Enabled${NC}" - echo "" - echo "You must enable exactly ONE plugin in config.yaml." - echo "" - echo "To enable a plugin, set 'enabled: true' for:" - echo " • ckan" - echo " • A custom plugin in custom_plugins/" - echo "" - echo "See docs/GETTING_STARTED.md for setup instructions." - exit 1 -elif [ $EXIT_CODE -eq 2 ]; then - ENABLED_PLUGINS=$(python3 << 'EOF' -import yaml -with open('config.yaml', 'r') as f: - config = yaml.safe_load(f) -plugins = config.get('plugins', {}) -enabled = [name for name, cfg in plugins.items() - if isinstance(cfg, dict) and cfg.get('enabled', False)] -print("\n".join(f" • {name}" for name in enabled)) -EOF - ) - - echo -e "${RED}❌ Configuration Error: Multiple Plugins Enabled${NC}" - echo "" - echo "You have $ENABLED_COUNT plugins enabled in config.yaml:" - echo "$ENABLED_PLUGINS" - echo "" - echo "OpenContext enforces: One Fork = One MCP Server" - echo "" - echo "This keeps deployments:" - echo " ✓ Simple and focused" - echo " ✓ Independently scalable" - echo " ✓ Easy to maintain" - echo "" - echo "To deploy multiple MCP servers:" - echo "" - echo " 1. Fork this repository again" - echo " Example: opencontext-opendata, opencontext-mbta" - echo "" - echo " 2. Configure ONE plugin per fork" - FIRST_PLUGIN=$(echo "$ENABLED_PLUGINS" | head -n1 | sed 's/ • //') - SECOND_PLUGIN=$(echo "$ENABLED_PLUGINS" | tail -n1 | sed 's/ • //') - echo " Fork #1: Enable $FIRST_PLUGIN only" - echo " Fork #2: Enable $SECOND_PLUGIN only" - echo "" - echo " 3. Deploy each fork separately" - echo " ./scripts/deploy.sh (in each fork)" - echo "" - echo "See docs/ARCHITECTURE.md for details." - exit 1 -elif [ $EXIT_CODE -ne 0 ]; then - echo -e "${RED}❌ Error validating configuration${NC}" - exit 1 -fi - -ENABLED_PLUGIN=$(python3 << 'EOF' -import yaml -with open('config.yaml', 'r') as f: - config = yaml.safe_load(f) -plugins = config.get('plugins', {}) -enabled = [name for name, cfg in plugins.items() - if isinstance(cfg, dict) and cfg.get('enabled', False)] -print(enabled[0]) -EOF -) - -echo -e "${GREEN}✓ Configuration valid: ${ENABLED_PLUGIN} plugin enabled${NC}" -echo "" - -# Extract server name and AWS settings -SERVER_NAME=$(python3 << 'EOF' -import yaml -with open('config.yaml', 'r') as f: - config = yaml.safe_load(f) -print(config.get('server_name', 'my-mcp-server')) -EOF -) - -AWS_REGION=$(python3 << 'EOF' -import yaml -with open('config.yaml', 'r') as f: - config = yaml.safe_load(f) -print(config.get('aws', {}).get('region', 'us-east-1')) -EOF -) - -TFVARS_FILE="terraform/aws/${ENVIRONMENT}.tfvars" -LAMBDA_NAME=$(grep '^lambda_name' "$TFVARS_FILE" | sed 's/.*=\s*"\(.*\)"/\1/') - -echo -e "${YELLOW}📦 Step 2: Packaging Lambda code...${NC}" - -# Create deployment package directory -PACKAGE_DIR=".deploy" -rm -rf "$PACKAGE_DIR" -mkdir -p "$PACKAGE_DIR" - -# Copy code to package directory -cp -r core "$PACKAGE_DIR/" -cp -r plugins "$PACKAGE_DIR/" -cp -r custom_plugins "$PACKAGE_DIR/" 2>/dev/null || mkdir -p "$PACKAGE_DIR/custom_plugins" -cp -r server "$PACKAGE_DIR/" -cp requirements.txt "$PACKAGE_DIR/" 2>/dev/null || true - -# Install Python dependencies into package directory -echo "Installing Python dependencies..." - -# Prefer uv for faster, cached installs; fall back to pip if uv is unavailable -if command -v uv &> /dev/null; then - echo "Using uv to install dependencies..." - if ! uv pip install -r requirements.txt \ - --target "$PACKAGE_DIR/" \ - --python-platform x86_64-manylinux2014 \ - --python-version 3.11 \ - --no-compile; then - echo -e "${RED}❌ Error: Failed to install dependencies with uv${NC}" - exit 1 - fi -else - echo "uv not found, falling back to pip..." - if ! pip install -r requirements.txt -t "$PACKAGE_DIR/" --platform manylinux2014_x86_64 --only-binary :all: --no-compile --no-deps 2>/dev/null; then - echo "Platform-specific install failed, trying generic install..." - if ! pip install -r requirements.txt -t "$PACKAGE_DIR/" --no-compile 2>/dev/null; then - echo -e "${RED}❌ Error: Failed to install dependencies${NC}" - echo "Please ensure pip is available and requirements.txt is valid." - exit 1 - fi - fi -fi - -# Create zip file -ZIP_FILE="lambda-deployment.zip" -cd "$PACKAGE_DIR" -zip -r "../$ZIP_FILE" . > /dev/null -cd .. - -echo -e "${GREEN}✓ Lambda package created: $ZIP_FILE${NC}" -echo "" - -echo -e "${YELLOW}🏗️ Step 3: Deploying with Terraform...${NC}" - -# Copy zip file and config.yaml to Terraform module directory -cp "$ZIP_FILE" terraform/aws/lambda-deployment.zip -cp config.yaml terraform/aws/config.yaml - -# Initialize Terraform if needed -if [ ! -d "terraform/aws/.terraform" ]; then - echo "Initializing Terraform..." - cd terraform/aws - terraform init - cd ../.. -fi - -# Plan first - validates configuration and catches errors before any changes -cd terraform/aws - -echo "Selecting Terraform workspace: ${TF_WORKSPACE}" -terraform workspace select "$TF_WORKSPACE" 2>/dev/null || terraform workspace new "$TF_WORKSPACE" - -echo -e "${YELLOW}📋 Planning Terraform changes...${NC}" -if ! terraform plan \ - -out=tfplan \ - -var-file="${ENVIRONMENT}.tfvars" \ - -var="aws_region=$AWS_REGION" \ - -var="config_file=config.yaml"; then - echo -e "${RED}❌ Terraform plan failed - aborting deployment${NC}" - exit 1 -fi - -# Require explicit approval before deploying -echo "" -echo -e "${YELLOW}⚠️ Deployment will apply the planned changes to AWS.${NC}" -echo -e " Environment: ${ENVIRONMENT}" -echo -e " Workspace: ${TF_WORKSPACE}" -echo -e " Lambda: ${LAMBDA_NAME}" -echo -e " Region: ${AWS_REGION}" -echo "" -read -r -p "Do you want to proceed with deployment? (yes/no): " CONFIRM -if [ "$CONFIRM" != "yes" ] && [ "$CONFIRM" != "y" ]; then - echo -e "${YELLOW}Deployment cancelled by user.${NC}" - rm -f tfplan - exit 0 -fi -echo "" - -# Apply the planned changes -echo -e "${YELLOW}🚀 Applying Terraform changes...${NC}" -terraform apply tfplan -rm -f tfplan - -# Get URLs from Terraform output -LAMBDA_URL=$(terraform output -raw lambda_url 2>/dev/null || echo "") -API_GATEWAY_URL=$(terraform output -raw api_gateway_url 2>/dev/null || echo "") - -cd .. - -echo "" -echo -e "${GREEN}✅ Deployment complete!${NC}" -echo "" -echo "API Gateway URL (use for Claude Connectors):" -echo -e "${GREEN}$API_GATEWAY_URL${NC}" -echo "" -echo "Lambda Function URL (for direct HTTP testing):" -echo -e "${GREEN}$LAMBDA_URL${NC}" -echo "" -echo "Connect via Claude Connectors (same on Claude.ai and Claude Desktop):" -echo " 1. Go to Settings → Connectors (or Customize → Connectors on claude.ai)" -echo " 2. Click 'Add custom connector'" -echo " 3. Enter a name and URL: $API_GATEWAY_URL" -echo "" diff --git a/scripts/local_server.py b/scripts/local_server.py deleted file mode 100755 index 5db65ca..0000000 --- a/scripts/local_server.py +++ /dev/null @@ -1,228 +0,0 @@ -# run_local_server.py -"""Run OpenContext MCP server locally for testing (no Lambda needed).""" - -import asyncio -import json -import logging -import os -import sys -import time -import uuid -from pathlib import Path - -# Add project root to Python path so we can import from core -project_root = Path(__file__).parent.parent.resolve() -if str(project_root) not in sys.path: - sys.path.insert(0, str(project_root)) - -import yaml -from aiohttp import web - -from core.logging_utils import configure_json_logging -from core.mcp_server import MCPServer -from core.plugin_manager import PluginManager -from core.validators import get_logging_config - -logger = logging.getLogger(__name__) - -# Load config (OPENCONTEXT_CONFIG env var for tests; default config.yaml) -_config_path = os.environ.get("OPENCONTEXT_CONFIG", "config.yaml") -with open(_config_path) as f: - config = yaml.safe_load(f) - -# Configure JSON logging - use pretty format for local development -logging_config = get_logging_config(config) -configure_json_logging( - level=logging_config.get("level", "INFO"), - pretty=True, # Pretty-print JSON for better local readability -) - -# Global server instance -_plugin_manager = None -_mcp_server = None - - -async def init_server(): - """Initialize server on startup.""" - global _plugin_manager, _mcp_server - - print("🚀 Initializing OpenContext MCP Server locally...") - - # Initialize Plugin Manager - _plugin_manager = PluginManager(config) - await _plugin_manager.load_plugins() - - # Initialize MCP Server - _mcp_server = MCPServer(_plugin_manager) - - print("✅ Server initialized successfully") - print(f"Loaded plugins: {list(_plugin_manager.plugins.keys())}") - print(f"Available tools: {len(_plugin_manager.get_all_tools())}") - - -async def handle_mcp_request(request): - """Handle MCP JSON-RPC request.""" - start_time = time.perf_counter() - try: - body = await request.text() - headers = dict(request.headers) - - # Extract session ID from headers for logging - session_id = headers.get("mcp-session-id") or headers.get("Mcp-Session-Id") - - # Parse JSON to detect method and extract details for logging - try: - request_json = json.loads(body) - method = request_json.get("method", "unknown") - tool_name = None - tool_args = None - - if method == "tools/call": - params = request_json.get("params", {}) - tool_name = params.get("name") - tool_args = params.get("arguments", {}) - except (json.JSONDecodeError, AttributeError): - method = "unknown" - tool_name = None - tool_args = None - - # Log incoming request details - logger.info( - "Incoming MCP request", - extra={ - "session_id": session_id, - "method": method, - "tool_name": tool_name, - "tool_arguments": tool_args if tool_args else None, - }, - ) - - # Check if this is an initialize request - is_initialize = method == "initialize" - session_id_to_return = None - - if is_initialize: - session_id_to_return = str(uuid.uuid4()) - logger.info( - f"Initialize request detected, generating session ID: {session_id_to_return}" - ) - - # Use the same handler as Lambda - response = await _mcp_server.handle_http_request(body, headers) - - # Add session ID to response headers if this was an initialize request - response_headers = dict(response.get("headers", {})) - if session_id_to_return: - response_headers["Mcp-Session-Id"] = session_id_to_return - - # Calculate and log response time - duration_ms = (time.perf_counter() - start_time) * 1000 - logger.info( - "MCP request processed", - extra={ - "session_id": session_id_to_return or session_id, - "method": method, - "tool_name": tool_name, - "duration_ms": round(duration_ms, 2), - "status_code": response.get("statusCode", 200), - }, - ) - - return web.Response( - text=response.get("body", "{}"), - status=response.get("statusCode", 200), - headers=response_headers, - ) - - except Exception as e: - duration_ms = (time.perf_counter() - start_time) * 1000 - logger.error( - f"Error processing MCP request: {e}", - extra={"duration_ms": round(duration_ms, 2)}, - exc_info=True, - ) - return web.Response( - text=json.dumps( - { - "jsonrpc": "2.0", - "id": None, - "error": {"code": -32603, "message": str(e)}, - } - ), - status=500, - headers={"Content-Type": "application/json"}, - ) - - -async def start_server(): - """Start local HTTP server.""" - await init_server() - - app = web.Application() - app.router.add_post("/mcp", handle_mcp_request) - - runner = web.AppRunner(app) - await runner.setup() - site = web.TCPSite(runner, "localhost", 8000) - await site.start() - - # Generate server name from config variables - server_name = None - if "plugins" in config: - # Try to get city_name from enabled plugin - for plugin_name, plugin_config in config["plugins"].items(): - if plugin_config.get("enabled"): - if "city_name" in plugin_config: - city_name = plugin_config["city_name"].lower().replace(" ", "-") - server_name = f"{city_name}-opendata" - break - elif "organization" in plugin_config: - org_name = plugin_config["organization"].lower().replace(" ", "-") - server_name = f"{org_name}-opendata" - break - - # Fallback to lambda_name or server_name from config - if not server_name: - if "aws" in config and "lambda_name" in config["aws"]: - lambda_name = config["aws"]["lambda_name"] - # Remove -mcp suffix if present - server_name = lambda_name.replace("-mcp", "") - elif "server_name" in config: - server_name = ( - config["server_name"].lower().replace(" ", "-").replace("'", "") - ) - - # Default fallback - if not server_name: - server_name = "opencontext-mcp" - - print("\n" + "=" * 50) - print("🌐 Local MCP Server running!") - print("=" * 50) - print("URL: http://localhost:8000/mcp") - print("\n" + "=" * 50) - print("📋 Connect via Claude Connectors") - print("=" * 50) - print("\n1. Go to Settings → Connectors (or Customize → Connectors on claude.ai)") - print("2. Click 'Add custom connector'") - print("3. Enter a name and URL: http://localhost:8000/mcp") - print("\nNote: Localhost works with Claude Desktop only (web needs a deployed URL).") - print("\n" + "=" * 50) - print("\nTest with:") - print(" ./scripts/test_streamable_http.sh") - print( - ' or curl -X POST http://localhost:8000/mcp -H \'Content-Type: application/json\' -d \'{"jsonrpc":"2.0","id":1,"method":"ping"}\'' - ) - print("\nPress Ctrl+C to stop") - print("=" * 50 + "\n") - - # Keep running - try: - await asyncio.Event().wait() - except KeyboardInterrupt: - print("\n👋 Shutting down...") - await _plugin_manager.shutdown() - - -if __name__ == "__main__": - asyncio.run(start_server()) diff --git a/scripts/setup-backend.sh b/scripts/setup-backend.sh deleted file mode 100755 index bc08592..0000000 --- a/scripts/setup-backend.sh +++ /dev/null @@ -1,82 +0,0 @@ -#!/bin/bash -# S3 Backend Setup Script -# Creates backend.tf for Terraform remote state - -echo "Setting up S3 backend for Terraform..." - -# Get AWS account ID and region -AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/null) -AWS_REGION=$(aws configure get region 2>/dev/null || echo "us-east-1") - -if [ -z "$AWS_ACCOUNT_ID" ]; then - echo "❌ Error: Could not get AWS account ID. Make sure AWS CLI is configured." - exit 1 -fi - -# Generate bucket name -BUCKET_NAME="opencontext-terraform-state-${AWS_ACCOUNT_ID}-${AWS_REGION}" -TABLE_NAME="terraform-state-lock" - -echo "AWS Account ID: $AWS_ACCOUNT_ID" -echo "AWS Region: $AWS_REGION" -echo "S3 Bucket: $BUCKET_NAME" -echo "DynamoDB Table: $TABLE_NAME" -echo "" - -# Create S3 bucket if it doesn't exist -echo "Creating S3 bucket..." -if ! aws s3api head-bucket --bucket "$BUCKET_NAME" 2>/dev/null; then - if [ "$AWS_REGION" = "us-east-1" ]; then - aws s3api create-bucket --bucket "$BUCKET_NAME" --region "$AWS_REGION" - else - aws s3api create-bucket --bucket "$BUCKET_NAME" --region "$AWS_REGION" --create-bucket-configuration LocationConstraint="$AWS_REGION" - fi - aws s3api put-bucket-versioning --bucket "$BUCKET_NAME" --versioning-configuration Status=Enabled - aws s3api put-bucket-encryption --bucket "$BUCKET_NAME" --server-side-encryption-configuration '{"Rules":[{"ApplyServerSideEncryptionByDefault":{"SSEAlgorithm":"AES256"}}]}' - echo "✅ S3 bucket created" -else - echo "✅ S3 bucket already exists" -fi - -# Create DynamoDB table if it doesn't exist -echo "Creating DynamoDB table..." -if ! aws dynamodb describe-table --table-name "$TABLE_NAME" --region "$AWS_REGION" 2>/dev/null; then - aws dynamodb create-table \ - --table-name "$TABLE_NAME" \ - --attribute-definitions AttributeName=LockID,AttributeType=S \ - --key-schema AttributeName=LockID,KeyType=HASH \ - --billing-mode PAY_PER_REQUEST \ - --region "$AWS_REGION" - echo "⏳ Waiting for table to be active..." - aws dynamodb wait table-exists --table-name "$TABLE_NAME" --region "$AWS_REGION" - echo "✅ DynamoDB table created" -else - echo "✅ DynamoDB table already exists" -fi - -# Create backend.tf -BACKEND_TF_PATH="terraform/aws/backend.tf" -cat > "$BACKEND_TF_PATH" < /dev/null; then - echo "Error: jq is required but not installed." - echo "Install with: brew install jq (macOS) or apt-get install jq (Linux)" - exit 1 -fi - -BASE_URL="${1:-http://localhost:8000/mcp}" - -echo "==========================================" -echo "Testing OpenContext MCP Server" -echo "==========================================" -echo "Base URL: $BASE_URL" -echo "" - -# Colors for output -GREEN='\033[0;32m' -BLUE='\033[0;34m' -YELLOW='\033[1;33m' -NC='\033[0m' # No Color - -# Step 1: Initialize and extract session ID -echo -e "${BLUE}Step 1: Initialize MCP connection${NC}" -INIT_REQUEST='{ - "jsonrpc": "2.0", - "id": 1, - "method": "initialize", - "params": { - "protocolVersion": "2025-03-26", - "capabilities": {}, - "clientInfo": { - "name": "test-client", - "version": "1.0.0" - } - } -}' - -echo "Request:" -echo "$INIT_REQUEST" | jq '.' -echo "" - -# Use temp files to capture response -TEMP_HEADERS=$(mktemp) -TEMP_BODY=$(mktemp) - -# Get response with headers separated -HTTP_CODE=$(curl -s -o "$TEMP_BODY" -w "%{http_code}" -X POST "$BASE_URL" \ - -H "Content-Type: application/json" \ - -d "$INIT_REQUEST" \ - -D "$TEMP_HEADERS") - -# Extract session ID from headers (case-insensitive) -SESSION_ID=$(grep -i "mcp-session-id" "$TEMP_HEADERS" | head -n 1 | sed -E 's/^[^:]*:[[:space:]]*(.*)$/\1/' | tr -d '\r' | tr -d '\n' || echo "") - -# Body is in TEMP_BODY -BODY=$(cat "$TEMP_BODY") - -if [ "$HTTP_CODE" != "200" ]; then - echo -e "${YELLOW}Error: HTTP $HTTP_CODE${NC}" - echo "$BODY" | jq '.' 2>/dev/null || echo "$BODY" - rm -f "$TEMP_RESPONSE" "$TEMP_HEADERS" - exit 1 -fi - -echo "Response:" -echo "$BODY" | jq '.' -echo "" - -rm -f "$TEMP_RESPONSE" "$TEMP_HEADERS" - -if [ -z "$SESSION_ID" ]; then - echo -e "${YELLOW}Warning: Could not extract session ID from headers${NC}" - echo "Continuing without session ID..." -else - echo -e "${GREEN}Session ID: $SESSION_ID${NC}" - echo "" -fi - -# Step 2: List tools using session ID -echo -e "${BLUE}Step 2: List available tools${NC}" -LIST_REQUEST='{ - "jsonrpc": "2.0", - "id": 2, - "method": "tools/list" -}' - -echo "Request:" -echo "$LIST_REQUEST" | jq '.' -echo "" - -if [ -n "$SESSION_ID" ]; then - LIST_RESPONSE=$(curl -s -X POST "$BASE_URL" \ - -H "Content-Type: application/json" \ - -H "Mcp-Session-Id: $SESSION_ID" \ - -d "$LIST_REQUEST") -else - LIST_RESPONSE=$(curl -s -X POST "$BASE_URL" \ - -H "Content-Type: application/json" \ - -d "$LIST_REQUEST") -fi - -echo "Response:" -echo "$LIST_RESPONSE" | jq '.' -echo "" - -# Extract tool names -TOOL_COUNT=$(echo "$LIST_RESPONSE" | jq '.result.tools | length') -echo -e "${GREEN}Found $TOOL_COUNT tools${NC}" -echo "" - -# Step 3: Call a tool (ckan__search_datasets) -echo -e "${BLUE}Step 3: Call tool (ckan__search_datasets)${NC}" -CALL_REQUEST='{ - "jsonrpc": "2.0", - "id": 3, - "method": "tools/call", - "params": { - "name": "ckan__search_datasets", - "arguments": { - "query": "traffic", - "limit": 5 - } - } -}' - -echo "Request:" -echo "$CALL_REQUEST" | jq '.' -echo "" - -if [ -n "$SESSION_ID" ]; then - CALL_RESPONSE=$(curl -s -X POST "$BASE_URL" \ - -H "Content-Type: application/json" \ - -H "Mcp-Session-Id: $SESSION_ID" \ - -d "$CALL_REQUEST") -else - CALL_RESPONSE=$(curl -s -X POST "$BASE_URL" \ - -H "Content-Type: application/json" \ - -d "$CALL_REQUEST") -fi - -echo "Response:" -echo "$CALL_RESPONSE" | jq '.' -echo "" - -# Check for errors -ERROR=$(echo "$CALL_RESPONSE" | jq -r '.error // empty') -if [ -n "$ERROR" ]; then - echo -e "${YELLOW}Tool call returned an error:${NC}" - echo "$CALL_RESPONSE" | jq '.error' - exit 1 -else - echo -e "${GREEN}Tool call successful!${NC}" -fi - -echo "" -echo "==========================================" -echo -e "${GREEN}All tests passed!${NC}" -echo "==========================================" diff --git a/terraform/README.md b/terraform/README.md index 689043e..2dcc500 100644 --- a/terraform/README.md +++ b/terraform/README.md @@ -16,13 +16,13 @@ terraform apply ### Deploy OpenContext -Use the main deploy script (recommended): +Use the CLI (recommended): ```bash -./scripts/deploy.sh +opencontext deploy --env staging ``` -Or deploy manually: +Or deploy manually via Terraform: ```bash cd aws @@ -35,7 +35,7 @@ terraform apply ### Alternative: Per-account backend -Use [scripts/setup-backend.sh](../scripts/setup-backend.sh) to create a per-account S3 bucket and DynamoDB table, then generate `terraform/aws/backend.tf` with custom bucket name. Use this if you need a separate state bucket per AWS account. +Use `opencontext configure --state-bucket ` to create a per-account S3 bucket and generate `terraform/aws/backend.tf` with a custom bucket name. Use this if you need a separate state bucket per AWS account. ## Other Clouds diff --git a/terraform/aws/main.tf b/terraform/aws/main.tf index c57b6e2..7057ec5 100644 --- a/terraform/aws/main.tf +++ b/terraform/aws/main.tf @@ -77,8 +77,8 @@ resource "aws_iam_role_policy_attachment" "lambda_xray" { policy_arn = "arn:aws:iam::aws:policy/AWSXRayDaemonWriteAccess" } -# Lambda deployment package (created by scripts/deploy.sh) -# deploy.sh builds .deploy/ and lambda-deployment.zip, then copies the zip here. +# Lambda deployment package (created by `opencontext deploy`) +# `opencontext deploy` builds .deploy/ and lambda-deployment.zip, then copies the zip here. locals { lambda_zip_path = "${path.module}/lambda-deployment.zip" lambda_zip_hash = filebase64sha256(local.lambda_zip_path) diff --git a/terraform/bootstrap/README.md b/terraform/bootstrap/README.md index 9f82f93..fad8381 100644 --- a/terraform/bootstrap/README.md +++ b/terraform/bootstrap/README.md @@ -1,6 +1,8 @@ # Terraform Backend Bootstrap -Creates the S3 bucket and DynamoDB table used by the main OpenContext Terraform configuration for remote state storage. +Creates the S3 bucket used by the main OpenContext Terraform configuration for remote state storage. + +> **Note:** For most users, `opencontext configure` automatically creates the default `opencontext-terraform-state` S3 bucket. This bootstrap module is only needed if you want a custom per-account bucket name instead of that default. ## Prerequisites @@ -20,7 +22,6 @@ terraform apply This creates: - **S3 bucket** `opencontext-terraform-state` – stores Terraform state with versioning and encryption -- **DynamoDB table** `terraform-state-lock` – available for state locking (optional) **Note:** The main `terraform/aws` configuration uses the S3 bucket for state. The bucket name must match the `bucket` in `terraform/aws/main.tf` backend block. @@ -30,7 +31,6 @@ This creates: |----------|---------|-------------| | `aws_region` | `us-east-1` | AWS region for backend resources | | `state_bucket_name` | `opencontext-terraform-state` | S3 bucket name (must match `terraform/aws/main.tf`) | -| `lock_table_name` | `terraform-state-lock` | DynamoDB table name | ## After Bootstrap @@ -43,4 +43,4 @@ terraform plan -var="config_file=config.yaml" terraform apply ``` -Or use the deploy script from the project root: `./scripts/deploy.sh` +Or use the CLI from the project root: `opencontext deploy --env staging` diff --git a/terraform/bootstrap/main.tf b/terraform/bootstrap/main.tf index e02311e..98a42dd 100644 --- a/terraform/bootstrap/main.tf +++ b/terraform/bootstrap/main.tf @@ -1,5 +1,5 @@ # Bootstrap Terraform Backend -# Creates S3 bucket and DynamoDB table for Terraform state storage and locking. +# Creates the S3 bucket used by the main OpenContext Terraform configuration for remote state storage. # Run this once with local backend before using the main AWS configuration. # # Usage: @@ -53,19 +53,3 @@ resource "aws_s3_bucket_server_side_encryption_configuration" "terraform_state" } } } - -# DynamoDB table for state locking -resource "aws_dynamodb_table" "terraform_lock" { - name = var.lock_table_name - billing_mode = "PAY_PER_REQUEST" - hash_key = "LockID" - - attribute { - name = "LockID" - type = "S" - } - - lifecycle { - prevent_destroy = true - } -} diff --git a/terraform/bootstrap/outputs.tf b/terraform/bootstrap/outputs.tf index 32e1521..fc42952 100644 --- a/terraform/bootstrap/outputs.tf +++ b/terraform/bootstrap/outputs.tf @@ -2,8 +2,3 @@ output "state_bucket_name" { description = "Name of the S3 bucket for Terraform state" value = aws_s3_bucket.terraform_state.id } - -output "lock_table_name" { - description = "Name of the DynamoDB table for state locking" - value = aws_dynamodb_table.terraform_lock.name -} diff --git a/terraform/bootstrap/variables.tf b/terraform/bootstrap/variables.tf index ac4ab82..625c8c2 100644 --- a/terraform/bootstrap/variables.tf +++ b/terraform/bootstrap/variables.tf @@ -9,9 +9,3 @@ variable "state_bucket_name" { type = string default = "opencontext-terraform-state" } - -variable "lock_table_name" { - description = "Name of the DynamoDB table for state locking" - type = string - default = "terraform-state-lock" -} diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..ee4abd5 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,54 @@ +"""Shared pytest fixtures and sys.modules stubs for the OpenContext test suite. + +boto3 and botocore are listed as project dependencies in pyproject.toml and will +be present in any standard install (``uv sync --all-extras``). The lightweight +stubs below act as a fallback for minimal/partial installs that omit the AWS SDK +— they allow import-time resolution of CLI modules that reference boto3 at the +top level so the rest of the test suite can still run. +""" + +from __future__ import annotations + +import sys +import types +from unittest.mock import MagicMock + + +def _make_boto3_stub() -> types.ModuleType: + """Return a minimal boto3 stub sufficient for import-time resolution.""" + boto3_mod = types.ModuleType("boto3") + + # boto3.client() returns a MagicMock by default — tests that care about + # specific S3 responses should patch cli.commands.configure._ensure_state_bucket + # or boto3.client directly in their own setUp / patch context. + boto3_mod.client = MagicMock(return_value=MagicMock()) + return boto3_mod + + +def _make_botocore_stub() -> types.ModuleType: + """Return a minimal botocore stub that satisfies `import botocore.exceptions`.""" + botocore_mod = types.ModuleType("botocore") + + exceptions_mod = types.ModuleType("botocore.exceptions") + + class ClientError(Exception): + def __init__(self, error_response: dict, operation_name: str) -> None: + self.response = error_response + super().__init__(str(error_response)) + + exceptions_mod.ClientError = ClientError # type: ignore[attr-defined] + botocore_mod.exceptions = exceptions_mod # type: ignore[attr-defined] + + return botocore_mod, exceptions_mod + + +# Install stubs only when the real packages are absent so that environments +# that have boto3 installed (e.g. CI with full deps) continue to use the real +# library. +if "boto3" not in sys.modules: + sys.modules["boto3"] = _make_boto3_stub() + +if "botocore" not in sys.modules: + botocore_stub, botocore_exceptions_stub = _make_botocore_stub() + sys.modules["botocore"] = botocore_stub + sys.modules["botocore.exceptions"] = botocore_exceptions_stub diff --git a/tests/test_cli_configure.py b/tests/test_cli_configure.py index 5b0cf30..bbba90d 100644 --- a/tests/test_cli_configure.py +++ b/tests/test_cli_configure.py @@ -1,5 +1,9 @@ """Tests for CLI configure command — file-writing helpers and config generation.""" +import subprocess +from unittest.mock import MagicMock, patch + +import pytest import yaml @@ -242,3 +246,445 @@ def test_arcgis_config_keys(self): assert "portal_url" in result assert "city_name" in result assert "timeout" in result + + +# --------------------------------------------------------------------------- +# _ensure_state_bucket +# --------------------------------------------------------------------------- + + +def _make_client_error(code: str) -> "botocore.exceptions.ClientError": # noqa: F821 + """Build a ClientError using whatever ClientError class is in sys.modules.""" + import sys + + ClientError = sys.modules["botocore.exceptions"].ClientError + return ClientError({"Error": {"Code": code, "Message": "test"}}, "HeadBucket") + + +class TestEnsureStateBucket: + """Unit tests for _ensure_state_bucket — all S3 calls are mocked.""" + + # ------------------------------------------------------------------ + # Scenario 1: bucket already exists + # ------------------------------------------------------------------ + + def test_existing_bucket_returns_early_without_create(self, capsys): + """head_bucket succeeds → create_bucket is never called.""" + from cli.commands.configure import _ensure_state_bucket + + mock_s3 = MagicMock() + # head_bucket returns normally (bucket exists) + mock_s3.head_bucket.return_value = {} + + with patch("cli.commands.configure.boto3.client", return_value=mock_s3): + _ensure_state_bucket("my-bucket", "us-east-1") + + mock_s3.create_bucket.assert_not_called() + mock_s3.put_bucket_versioning.assert_not_called() + mock_s3.put_bucket_encryption.assert_not_called() + + def test_existing_bucket_prints_already_exists_message(self, capsys): + """When the bucket exists the function prints a status line containing the name.""" + + from cli.commands.configure import _ensure_state_bucket + + mock_s3 = MagicMock() + mock_s3.head_bucket.return_value = {} + + output_lines: list[str] = [] + # console.print is a rich Console — capture via patching + with ( + patch("cli.commands.configure.boto3.client", return_value=mock_s3), + patch( + "cli.commands.configure.console.print", + side_effect=lambda *a, **kw: output_lines.append(str(a[0])), + ), + ): + _ensure_state_bucket("my-state-bucket", "us-east-1") + + assert any("my-state-bucket" in line for line in output_lines), ( + f"Expected bucket name in output, got: {output_lines}" + ) + assert any("already exists" in line for line in output_lines), ( + f"Expected 'already exists' in output, got: {output_lines}" + ) + + # ------------------------------------------------------------------ + # Scenario 2: bucket missing in us-east-1 + # ------------------------------------------------------------------ + + def test_missing_bucket_us_east_1_creates_without_location_constraint(self): + """404 ClientError in us-east-1 → create_bucket called with no LocationConstraint.""" + from cli.commands.configure import _ensure_state_bucket + + mock_s3 = MagicMock() + mock_s3.head_bucket.side_effect = _make_client_error("404") + + with patch("cli.commands.configure.boto3.client", return_value=mock_s3): + _ensure_state_bucket("new-bucket", "us-east-1") + + mock_s3.create_bucket.assert_called_once_with(Bucket="new-bucket") + # No CreateBucketConfiguration kwarg + _, kwargs = mock_s3.create_bucket.call_args + assert "CreateBucketConfiguration" not in kwargs + + def test_missing_bucket_us_east_1_no_such_bucket_code(self): + """NoSuchBucket error code is also treated as missing → bucket is created.""" + from cli.commands.configure import _ensure_state_bucket + + mock_s3 = MagicMock() + mock_s3.head_bucket.side_effect = _make_client_error("NoSuchBucket") + + with patch("cli.commands.configure.boto3.client", return_value=mock_s3): + _ensure_state_bucket("new-bucket", "us-east-1") + + mock_s3.create_bucket.assert_called_once() + + def test_missing_bucket_us_east_1_enables_versioning(self): + """After creating the bucket in us-east-1, versioning is enabled.""" + from cli.commands.configure import _ensure_state_bucket + + mock_s3 = MagicMock() + mock_s3.head_bucket.side_effect = _make_client_error("404") + + with patch("cli.commands.configure.boto3.client", return_value=mock_s3): + _ensure_state_bucket("new-bucket", "us-east-1") + + mock_s3.put_bucket_versioning.assert_called_once_with( + Bucket="new-bucket", + VersioningConfiguration={"Status": "Enabled"}, + ) + + def test_missing_bucket_us_east_1_enables_aes256_encryption(self): + """After creating the bucket in us-east-1, AES256 server-side encryption is set.""" + from cli.commands.configure import _ensure_state_bucket + + mock_s3 = MagicMock() + mock_s3.head_bucket.side_effect = _make_client_error("404") + + with patch("cli.commands.configure.boto3.client", return_value=mock_s3): + _ensure_state_bucket("new-bucket", "us-east-1") + + mock_s3.put_bucket_encryption.assert_called_once() + _, kwargs = mock_s3.put_bucket_encryption.call_args + rules = kwargs["ServerSideEncryptionConfiguration"]["Rules"] + assert len(rules) == 1 + assert ( + rules[0]["ApplyServerSideEncryptionByDefault"]["SSEAlgorithm"] == "AES256" + ) + + def test_missing_bucket_us_east_1_call_order(self): + """create_bucket → put_bucket_versioning → put_bucket_encryption (in that order).""" + from cli.commands.configure import _ensure_state_bucket + + mock_s3 = MagicMock() + mock_s3.head_bucket.side_effect = _make_client_error("404") + + with patch("cli.commands.configure.boto3.client", return_value=mock_s3): + _ensure_state_bucket("ordered-bucket", "us-east-1") + + method_names = [c[0] for c in mock_s3.method_calls] + create_idx = method_names.index("create_bucket") + versioning_idx = method_names.index("put_bucket_versioning") + encryption_idx = method_names.index("put_bucket_encryption") + + assert create_idx < versioning_idx < encryption_idx, ( + f"Expected create < versioning < encryption, got indices " + f"{create_idx}, {versioning_idx}, {encryption_idx}" + ) + + # ------------------------------------------------------------------ + # Scenario 3: bucket missing in non-us-east-1 + # ------------------------------------------------------------------ + + def test_missing_bucket_non_us_east_1_creates_with_location_constraint(self): + """In us-west-2, create_bucket must include CreateBucketConfiguration.""" + from cli.commands.configure import _ensure_state_bucket + + mock_s3 = MagicMock() + mock_s3.head_bucket.side_effect = _make_client_error("404") + + with patch("cli.commands.configure.boto3.client", return_value=mock_s3): + _ensure_state_bucket("west-bucket", "us-west-2") + + mock_s3.create_bucket.assert_called_once_with( + Bucket="west-bucket", + CreateBucketConfiguration={"LocationConstraint": "us-west-2"}, + ) + + def test_missing_bucket_eu_west_1_uses_correct_region_constraint(self): + """LocationConstraint must match the region argument exactly.""" + from cli.commands.configure import _ensure_state_bucket + + mock_s3 = MagicMock() + mock_s3.head_bucket.side_effect = _make_client_error("404") + + with patch("cli.commands.configure.boto3.client", return_value=mock_s3): + _ensure_state_bucket("eu-bucket", "eu-west-1") + + _, kwargs = mock_s3.create_bucket.call_args + assert kwargs["CreateBucketConfiguration"]["LocationConstraint"] == "eu-west-1" + + def test_missing_bucket_non_us_east_1_still_enables_versioning_and_encryption(self): + """Versioning and encryption are enabled regardless of region.""" + from cli.commands.configure import _ensure_state_bucket + + mock_s3 = MagicMock() + mock_s3.head_bucket.side_effect = _make_client_error("404") + + with patch("cli.commands.configure.boto3.client", return_value=mock_s3): + _ensure_state_bucket("west-bucket", "us-west-2") + + mock_s3.put_bucket_versioning.assert_called_once() + mock_s3.put_bucket_encryption.assert_called_once() + + # ------------------------------------------------------------------ + # Scenario 4: non-404 ClientError is re-raised + # ------------------------------------------------------------------ + + def test_permission_denied_error_is_reraised(self): + """A 403 AccessDenied ClientError must propagate — never swallowed.""" + import sys + + from cli.commands.configure import _ensure_state_bucket + + ClientError = sys.modules["botocore.exceptions"].ClientError + access_denied = _make_client_error("403") + + mock_s3 = MagicMock() + mock_s3.head_bucket.side_effect = access_denied + + with patch("cli.commands.configure.boto3.client", return_value=mock_s3): + with pytest.raises(ClientError): + _ensure_state_bucket("locked-bucket", "us-east-1") + + mock_s3.create_bucket.assert_not_called() + + def test_unknown_client_error_is_reraised(self): + """Any unrecognised error code (e.g. 500) is re-raised unchanged.""" + import sys + + from cli.commands.configure import _ensure_state_bucket + + ClientError = sys.modules["botocore.exceptions"].ClientError + server_error = _make_client_error("500") + + mock_s3 = MagicMock() + mock_s3.head_bucket.side_effect = server_error + + with patch("cli.commands.configure.boto3.client", return_value=mock_s3): + with pytest.raises(ClientError): + _ensure_state_bucket("any-bucket", "us-east-1") + + # ------------------------------------------------------------------ + # Scenario 5: _ensure_state_bucket called before terraform init + # ------------------------------------------------------------------ + + @patch("cli.commands.configure.subprocess.run") + @patch("cli.commands.configure.run_cmd") + @patch("cli.commands.configure.get_project_root") + @patch("cli.commands.configure.get_terraform_dir") + @patch("cli.commands.configure.questionary") + def test_ensure_bucket_called_before_terraform_init( + self, + mock_q, + mock_tf_dir, + mock_root, + mock_run_cmd, + mock_subproc, + tmp_path, + ): + """_ensure_state_bucket must be invoked before terraform init runs.""" + from cli.commands.configure import configure + + tf_dir = tmp_path / "terraform" / "aws" + tf_dir.mkdir(parents=True) + # No .terraform dir → init will run + mock_root.return_value = tmp_path + mock_tf_dir.return_value = tf_dir + + def _make_q_helper(responses: list): + it = iter(responses) + + def _side(*a, **kw): + m = MagicMock() + m.ask.return_value = next(it) + return m + + return _side + + mock_q.select.side_effect = _make_q_helper( + ["Start from scratch", "staging", "CKAN"] + ) + mock_q.text.side_effect = _make_q_helper( + [ + "Org", + "City", + "https://data.example.gov", + "https://data.example.gov", + "City", + "120", + "us-east-1", + "city-mcp-staging", + "512", + "120", + ] + ) + mock_q.confirm.side_effect = _make_q_helper([False]) + + mock_subproc.return_value = subprocess.CompletedProcess( + args=[], returncode=0, stdout=" default\n", stderr="" + ) + mock_run_cmd.return_value = MagicMock(returncode=0) + + call_order: list[str] = [] + + mock_s3 = MagicMock() + mock_s3.head_bucket.return_value = {} # bucket exists — no creation + + def _record_s3_call(*a, **kw): + call_order.append("ensure_state_bucket") + return mock_s3 + + def _record_run_cmd(cmd, **kw): + call_order.append("run_cmd:" + " ".join(str(c) for c in cmd)) + return MagicMock(returncode=0) + + mock_run_cmd.side_effect = _record_run_cmd + + with patch("cli.commands.configure.boto3.client", side_effect=_record_s3_call): + configure() + + # The first recorded event must be the S3 client creation (bucket check), + # which happens inside _ensure_state_bucket, before any terraform commands. + assert "ensure_state_bucket" in call_order, ( + "boto3.client was never called — _ensure_state_bucket may not have run" + ) + init_indices = [i for i, e in enumerate(call_order) if "init" in e] + bucket_index = call_order.index("ensure_state_bucket") + + if init_indices: + first_init = min(init_indices) + assert bucket_index < first_init, ( + f"_ensure_state_bucket (index {bucket_index}) must run before " + f"terraform init (index {first_init}). Order: {call_order}" + ) + + +# --------------------------------------------------------------------------- +# --state-bucket flag +# --------------------------------------------------------------------------- + + +def _make_q_helper(responses: list): + """Return a side_effect function that returns MagicMocks with .ask() values.""" + it = iter(responses) + + def _side(*a, **kw): + m = MagicMock() + m.ask.return_value = next(it) + return m + + return _side + + +def _run_configure_wizard(tmp_path, extra_kwargs: dict | None = None): + """Run the configure wizard with a standard CKAN flow. + + extra_kwargs are forwarded to configure() (e.g. state_bucket="custom-bucket"). + Returns the list of run_cmd call_args_list items as strings. + """ + from cli.commands.configure import configure + + tf_dir = tmp_path / "terraform" / "aws" + tf_dir.mkdir(parents=True) + + mock_s3 = MagicMock() + mock_s3.head_bucket.return_value = {} + + with ( + patch("cli.commands.configure.get_project_root", return_value=tmp_path), + patch("cli.commands.configure.get_terraform_dir", return_value=tf_dir), + patch("cli.commands.configure.boto3.client", return_value=mock_s3), + patch("cli.commands.configure.questionary") as mock_q, + patch("cli.commands.configure.subprocess.run") as mock_subproc, + patch("cli.commands.configure.run_cmd") as mock_run_cmd, + ): + mock_q.select.side_effect = _make_q_helper( + ["Start from scratch", "staging", "CKAN"] + ) + mock_q.text.side_effect = _make_q_helper( + [ + "Org", + "City", + "https://data.example.gov", + "https://data.example.gov", + "City", + "120", + "us-east-1", + "city-mcp-staging", + "512", + "120", + ] + ) + mock_q.confirm.side_effect = _make_q_helper([False]) + mock_subproc.return_value = subprocess.CompletedProcess( + args=[], returncode=0, stdout=" default\n", stderr="" + ) + mock_run_cmd.return_value = MagicMock(returncode=0) + + configure(**(extra_kwargs or {})) + + return mock_run_cmd.call_args_list, mock_s3 + + +class TestStateBucketFlag: + """Tests for the --state-bucket CLI option on configure().""" + + def test_default_bucket_passed_to_ensure_state_bucket(self, tmp_path): + """When --state-bucket is not provided, _ensure_state_bucket uses the default name.""" + from cli.commands.configure import TERRAFORM_STATE_BUCKET + + _, mock_s3 = _run_configure_wizard(tmp_path) + + # boto3.client is called inside _ensure_state_bucket; head_bucket receives + # the bucket name via the Bucket kwarg. + mock_s3.head_bucket.assert_called_once_with(Bucket=TERRAFORM_STATE_BUCKET) + + def test_custom_bucket_passed_to_ensure_state_bucket(self, tmp_path): + """When --state-bucket is provided, _ensure_state_bucket receives the custom name.""" + custom = "my-custom-tf-state" + _, mock_s3 = _run_configure_wizard(tmp_path, {"state_bucket": custom}) + + mock_s3.head_bucket.assert_called_once_with(Bucket=custom) + + def test_default_bucket_passes_backend_config_to_init(self, tmp_path): + """With the default bucket, terraform init includes -backend-config for bucket and region.""" + from cli.commands.configure import TERRAFORM_STATE_BUCKET + + calls, _ = _run_configure_wizard(tmp_path) + init_calls = [c for c in calls if "init" in str(c)] + assert init_calls, "Expected at least one terraform init call" + cmd_args = init_calls[0][0][0] + assert any( + f"-backend-config=bucket={TERRAFORM_STATE_BUCKET}" in arg + for arg in cmd_args + ), f"Expected -backend-config=bucket in terraform init; got: {cmd_args}" + assert any("-backend-config=region=" in arg for arg in cmd_args), ( + f"Expected -backend-config=region in terraform init; got: {cmd_args}" + ) + + def test_custom_bucket_adds_backend_config_to_init(self, tmp_path): + """With a custom bucket, terraform init is called WITH -backend-config=bucket=.""" + custom = "my-custom-tf-state" + calls, _ = _run_configure_wizard(tmp_path, {"state_bucket": custom}) + init_calls = [c for c in calls if "init" in str(c)] + assert init_calls, "Expected at least one terraform init call" + cmd_args = init_calls[0][0][0] + assert any(f"-backend-config=bucket={custom}" in arg for arg in cmd_args), ( + f"Expected -backend-config=bucket={custom} in a terraform init call; " + f"init calls were: {init_calls}" + ) + assert any("-backend-config=region=" in arg for arg in cmd_args), ( + f"Expected -backend-config=region in terraform init; got: {cmd_args}" + ) diff --git a/tests/test_cli_serve.py b/tests/test_cli_serve.py new file mode 100644 index 0000000..b5d955b --- /dev/null +++ b/tests/test_cli_serve.py @@ -0,0 +1,407 @@ +"""Tests for CLI serve command.""" + +from __future__ import annotations + +import asyncio +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +import typer +import yaml + +from cli.commands.serve import ( + _derive_server_name, + _load_config, + _run_server, + serve, +) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture() +def config_file(tmp_path: Path) -> Path: + """Write a minimal valid config.yaml to tmp_path and return its path.""" + cfg = { + "server_name": "Test MCP", + "plugins": { + "ckan": { + "enabled": True, + "base_url": "https://data.example.gov", + "city_name": "Testville", + "timeout": 30, + } + }, + "logging": {"level": "INFO", "format": "json"}, + } + path = tmp_path / "config.yaml" + path.write_text(yaml.dump(cfg)) + return path + + +# --------------------------------------------------------------------------- +# _load_config +# --------------------------------------------------------------------------- + + +class TestLoadConfig: + def test_loads_valid_yaml(self, config_file: Path) -> None: + cfg, resolved = _load_config(str(config_file)) + assert cfg["server_name"] == "Test MCP" + assert resolved == config_file.resolve() + + def test_missing_file_raises_exit(self, tmp_path: Path) -> None: + with pytest.raises(typer.Exit): + _load_config(str(tmp_path / "nonexistent.yaml")) + + +# --------------------------------------------------------------------------- +# _derive_server_name +# --------------------------------------------------------------------------- + + +class TestDeriveServerName: + def test_uses_city_name_from_enabled_plugin(self) -> None: + config = { + "plugins": { + "ckan": {"enabled": True, "city_name": "New York"}, + } + } + assert _derive_server_name(config) == "new-york-opendata" + + def test_uses_organization_when_no_city_name(self) -> None: + config = { + "plugins": { + "arcgis": {"enabled": True, "organization": "County GIS"}, + } + } + assert _derive_server_name(config) == "county-gis-opendata" + + def test_skips_disabled_plugins(self) -> None: + config = { + "plugins": { + "ckan": {"enabled": False, "city_name": "Boston"}, + }, + "server_name": "Fallback Server", + } + result = _derive_server_name(config) + assert result == "fallback-server" + + def test_falls_back_to_lambda_name(self) -> None: + config = {"aws": {"lambda_name": "my-city-mcp"}} + assert _derive_server_name(config) == "my-city" + + def test_falls_back_to_server_name(self) -> None: + config = {"server_name": "Boston OpenData MCP"} + assert _derive_server_name(config) == "boston-opendata-mcp" + + def test_default_fallback(self) -> None: + assert _derive_server_name({}) == "opencontext-mcp" + + +# --------------------------------------------------------------------------- +# serve — default port (8000) +# --------------------------------------------------------------------------- + + +class TestServeDefaultPort: + def test_default_port_is_8000(self, config_file: Path) -> None: + """When --port is not given, 8000 is passed to _run_server.""" + captured: dict = {} + + async def _fake_run(config: dict, port: int) -> None: + captured["port"] = port + + mock_run_server = AsyncMock(side_effect=_fake_run) + + with ( + patch("cli.commands.serve._load_config", return_value=({}, config_file)), + patch("cli.commands.serve._run_server", mock_run_server), + ): + ctx = MagicMock() + ctx.invoked_subcommand = None + serve(ctx=ctx, port=8000, config=str(config_file)) + + assert captured.get("port") == 8000 + + +# --------------------------------------------------------------------------- +# serve — default config path +# --------------------------------------------------------------------------- + + +class TestServeDefaultConfigPath: + def test_default_config_path_is_config_yaml( + self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path + ) -> None: + """When --config is empty and OPENCONTEXT_CONFIG not set, 'config.yaml' is used.""" + monkeypatch.delenv("OPENCONTEXT_CONFIG", raising=False) + + captured: dict = {} + + def _fake_load(path: str) -> tuple[dict, Path]: + captured["path"] = path + return {}, tmp_path / "config.yaml" + + with ( + patch("cli.commands.serve._load_config", side_effect=_fake_load), + patch("asyncio.run"), + ): + ctx = MagicMock() + ctx.invoked_subcommand = None + serve(ctx=ctx, port=8000, config="") + + assert captured["path"] == "config.yaml" + + +# --------------------------------------------------------------------------- +# serve — --port flag +# --------------------------------------------------------------------------- + + +class TestServePortFlag: + def test_custom_port_passed_to_run_server(self, config_file: Path) -> None: + captured: dict = {} + + async def _fake_run(config: dict, port: int) -> None: + captured["port"] = port + + mock_run_server = AsyncMock(side_effect=_fake_run) + + with ( + patch("cli.commands.serve._load_config", return_value=({}, config_file)), + patch("cli.commands.serve._run_server", mock_run_server), + ): + ctx = MagicMock() + ctx.invoked_subcommand = None + serve(ctx=ctx, port=9090, config=str(config_file)) + + assert captured.get("port") == 9090 + + +# --------------------------------------------------------------------------- +# serve — --config flag +# --------------------------------------------------------------------------- + + +class TestServeConfigFlag: + def test_explicit_config_path_is_used(self, config_file: Path) -> None: + captured: dict = {} + + def _fake_load(path: str) -> tuple[dict, Path]: + captured["path"] = path + return {}, config_file + + with ( + patch("cli.commands.serve._load_config", side_effect=_fake_load), + patch("cli.commands.serve._run_server", AsyncMock()), + ): + ctx = MagicMock() + ctx.invoked_subcommand = None + serve(ctx=ctx, port=8000, config=str(config_file)) + + assert captured["path"] == str(config_file) + + +# --------------------------------------------------------------------------- +# serve — OPENCONTEXT_CONFIG env var +# --------------------------------------------------------------------------- + + +class TestServeEnvVar: + def test_env_var_used_when_config_flag_absent( + self, config_file: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + monkeypatch.setenv("OPENCONTEXT_CONFIG", str(config_file)) + + captured: dict = {} + + def _fake_load(path: str) -> tuple[dict, Path]: + captured["path"] = path + return {}, config_file + + with ( + patch("cli.commands.serve._load_config", side_effect=_fake_load), + patch("cli.commands.serve._run_server", AsyncMock()), + ): + ctx = MagicMock() + ctx.invoked_subcommand = None + serve(ctx=ctx, port=8000, config="") + + assert captured["path"] == str(config_file) + + def test_explicit_config_flag_overrides_env_var( + self, config_file: Path, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + other = tmp_path / "other.yaml" + other.write_text(yaml.dump({"server_name": "other"})) + monkeypatch.setenv("OPENCONTEXT_CONFIG", str(config_file)) + + captured: dict = {} + + def _fake_load(path: str) -> tuple[dict, Path]: + captured["path"] = path + return {}, other + + with ( + patch("cli.commands.serve._load_config", side_effect=_fake_load), + patch("cli.commands.serve._run_server", AsyncMock()), + ): + ctx = MagicMock() + ctx.invoked_subcommand = None + serve(ctx=ctx, port=8000, config=str(other)) + + assert captured["path"] == str(other) + + +# --------------------------------------------------------------------------- +# _run_server — startup initialises PluginManager and MCPServer +# --------------------------------------------------------------------------- + + +def _make_aiohttp_mocks(): + """Return a dict of mocks needed to stub out aiohttp in _run_server.""" + mock_app = MagicMock() + mock_app.router.add_post = MagicMock() + + mock_runner = AsyncMock() + mock_site = AsyncMock() + + mock_event_instance = MagicMock() + mock_event_instance.wait = AsyncMock(side_effect=KeyboardInterrupt) + + return { + "app": mock_app, + "runner": mock_runner, + "site": mock_site, + "event_instance": mock_event_instance, + } + + +class TestRunServerStartup: + def _make_plugin_manager(self) -> MagicMock: + pm = MagicMock() + pm.load_plugins = AsyncMock() + pm.shutdown = AsyncMock() + pm.plugins = {"ckan": MagicMock()} + pm.get_all_tools.return_value = [MagicMock(), MagicMock()] + return pm + + def test_plugin_manager_load_plugins_called(self, config_file: Path) -> None: + config = yaml.safe_load(config_file.read_text()) + pm = self._make_plugin_manager() + mocks = _make_aiohttp_mocks() + + with ( + patch("cli.commands.serve.configure_json_logging"), + patch( + "cli.commands.serve.get_logging_config", return_value={"level": "INFO"} + ), + patch("cli.commands.serve.PluginManager", return_value=pm), + patch("cli.commands.serve.MCPServer"), + patch("cli.commands.serve.web.Application", return_value=mocks["app"]), + patch("cli.commands.serve.web.AppRunner", return_value=mocks["runner"]), + patch("cli.commands.serve.web.TCPSite", return_value=mocks["site"]), + patch( + "cli.commands.serve.asyncio.Event", return_value=mocks["event_instance"] + ), + ): + asyncio.run(_run_server(config, port=8000)) + + pm.load_plugins.assert_awaited_once() + + def test_mcp_server_initialised_with_plugin_manager( + self, config_file: Path + ) -> None: + config = yaml.safe_load(config_file.read_text()) + pm = self._make_plugin_manager() + mocks = _make_aiohttp_mocks() + + with ( + patch("cli.commands.serve.configure_json_logging"), + patch( + "cli.commands.serve.get_logging_config", return_value={"level": "INFO"} + ), + patch("cli.commands.serve.PluginManager", return_value=pm), + patch("cli.commands.serve.MCPServer") as mock_mcp_cls, + patch("cli.commands.serve.web.Application", return_value=mocks["app"]), + patch("cli.commands.serve.web.AppRunner", return_value=mocks["runner"]), + patch("cli.commands.serve.web.TCPSite", return_value=mocks["site"]), + patch( + "cli.commands.serve.asyncio.Event", return_value=mocks["event_instance"] + ), + ): + asyncio.run(_run_server(config, port=8000)) + + mock_mcp_cls.assert_called_once_with(pm) + + +# --------------------------------------------------------------------------- +# _run_server — graceful shutdown calls plugin_manager.shutdown() +# --------------------------------------------------------------------------- + + +class TestRunServerShutdown: + def test_shutdown_called_on_keyboard_interrupt(self, config_file: Path) -> None: + config = yaml.safe_load(config_file.read_text()) + pm = MagicMock() + pm.load_plugins = AsyncMock() + pm.shutdown = AsyncMock() + pm.plugins = {} + pm.get_all_tools.return_value = [] + mocks = _make_aiohttp_mocks() + + with ( + patch("cli.commands.serve.configure_json_logging"), + patch( + "cli.commands.serve.get_logging_config", return_value={"level": "INFO"} + ), + patch("cli.commands.serve.PluginManager", return_value=pm), + patch("cli.commands.serve.MCPServer"), + patch("cli.commands.serve.web.Application", return_value=mocks["app"]), + patch("cli.commands.serve.web.AppRunner", return_value=mocks["runner"]), + patch("cli.commands.serve.web.TCPSite", return_value=mocks["site"]), + patch( + "cli.commands.serve.asyncio.Event", return_value=mocks["event_instance"] + ), + ): + asyncio.run(_run_server(config, port=8000)) + + pm.shutdown.assert_awaited_once() + + +# --------------------------------------------------------------------------- +# serve — config file not found produces clear error +# --------------------------------------------------------------------------- + + +class TestServeConfigNotFound: + def test_missing_config_exits_with_error( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + monkeypatch.delenv("OPENCONTEXT_CONFIG", raising=False) + + with pytest.raises(typer.Exit): + ctx = MagicMock() + ctx.invoked_subcommand = None + serve(ctx=ctx, port=8000, config=str(tmp_path / "missing.yaml")) + + +# --------------------------------------------------------------------------- +# Registration in cli/main.py +# --------------------------------------------------------------------------- + + +class TestServeRegistered: + def test_serve_typer_registered_in_app(self) -> None: + """serve_app must be mounted under the 'serve' name in the main app.""" + from cli.main import app + + group_names = [g.name or "" for g in app.registered_groups] + assert "serve" in group_names, ( + f"'serve' not found in registered groups: {group_names}" + ) diff --git a/uv.lock b/uv.lock index 6fc1bb5..1dab30c 100644 --- a/uv.lock +++ b/uv.lock @@ -175,6 +175,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e5/ca/78d423b324b8d77900030fa59c4aa9054261ef0925631cd2501dd015b7b7/boolean_py-5.0-py3-none-any.whl", hash = "sha256:ef28a70bd43115208441b53a045d1549e2f0ec6e3d08a9d142cbc41c1938e8d9", size = 26577, upload-time = "2025-04-03T10:39:48.449Z" }, ] +[[package]] +name = "boto3" +version = "1.42.83" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, + { name = "jmespath" }, + { name = "s3transfer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9f/87/1ed88eaa1e814841a37e71fee74c2b74341d14b791c0c6038b7ba914bea1/boto3-1.42.83.tar.gz", hash = "sha256:cc5621e603982cb3145b7f6c9970e02e297a1a0eb94637cc7f7b69d3017640ee", size = 112719, upload-time = "2026-04-03T19:34:21.254Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c1/b1/8a066bc8f02937d49783c0b3948ab951d8284e6fde436cab9f359dbd4d93/boto3-1.42.83-py3-none-any.whl", hash = "sha256:544846fdb10585bb7837e409868e8e04c6b372fa04479ba1597ce82cf1242076", size = 140555, upload-time = "2026-04-03T19:34:17.935Z" }, +] + +[[package]] +name = "botocore" +version = "1.42.83" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4e/01/b46a3f8b6e9362258f78f1890db1a96d4ed73214d6a36420dc158dcfd221/botocore-1.42.83.tar.gz", hash = "sha256:34bc8cb64b17ac17f8901f073fe4fc9572a5cac9393a37b2b3ea372a83b87f4a", size = 15140337, upload-time = "2026-04-03T19:34:08.779Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a3/97/0d6f50822dc8c1df7f3eadb0bc6822fc0f98f02287c4efc7c7c88fde129a/botocore-1.42.83-py3-none-any.whl", hash = "sha256:ec0c3ecb3772936ed22a3bdda09883b34858933f71004686d460d829bab39d8e", size = 14818388, upload-time = "2026-04-03T19:34:03.333Z" }, +] + [[package]] name = "cachecontrol" version = "0.14.4" @@ -630,6 +658,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, ] +[[package]] +name = "jmespath" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" }, +] + [[package]] name = "license-expression" version = "30.4.4" @@ -848,6 +885,9 @@ version = "1.0.0" source = { editable = "." } dependencies = [ { name = "aiohttp" }, + { name = "boto3" }, + { name = "botocore" }, + { name = "click" }, { name = "httpx" }, { name = "pre-commit" }, { name = "pydantic" }, @@ -877,6 +917,9 @@ dev = [ [package.metadata] requires-dist = [ { name = "aiohttp", specifier = ">=3.13.4" }, + { name = "boto3", specifier = ">=1.42.83" }, + { name = "botocore", specifier = ">=1.42.83" }, + { name = "click", specifier = ">=8.3.1" }, { name = "httpx", specifier = ">=0.27.0" }, { name = "pip-audit", marker = "extra == 'dev'", specifier = ">=2.0.0" }, { name = "pre-commit", specifier = ">=4.5.1" }, @@ -1314,6 +1357,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ca/31/d4e37e9e550c2b92a9cbc2e4d0b7420a27224968580b5a447f420847c975/pytest_xdist-3.8.0-py3-none-any.whl", hash = "sha256:202ca578cfeb7370784a8c33d6d05bc6e13b4f25b5053c30a152269fd10f0b88", size = 46396, upload-time = "2025-07-01T13:30:56.632Z" }, ] +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, +] + [[package]] name = "python-discovery" version = "1.2.1" @@ -1456,6 +1511,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/07/5bda6a85b220c64c65686bc85bd0bbb23b29c62b3a9f9433fa55f17cda93/ruff-0.15.1-py3-none-win_arm64.whl", hash = "sha256:5ff7d5f0f88567850f45081fac8f4ec212be8d0b963e385c3f7d0d2eb4899416", size = 10874604, upload-time = "2026-02-12T23:09:05.515Z" }, ] +[[package]] +name = "s3transfer" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827, upload-time = "2025-12-01T02:30:59.114Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830, upload-time = "2025-12-01T02:30:57.729Z" }, +] + [[package]] name = "shellingham" version = "1.5.4" @@ -1465,6 +1532,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, ] +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, +] + [[package]] name = "sortedcontainers" version = "2.4.0"