diff --git a/TESTING.md b/TESTING.md index ccf64cfa0..bf4d0c291 100644 --- a/TESTING.md +++ b/TESTING.md @@ -291,7 +291,7 @@ class TestExampleService: def test_with_database(db_session): """Test using database session fixture.""" # db_session is automatically provided by conftest.py - from mcpgateway.models import Tool + from mcpgateway.common.models import Tool tool = Tool(name="test_tool") db_session.add(tool) db_session.commit() diff --git a/docs/docs/architecture/multitenancy.md b/docs/docs/architecture/multitenancy.md index 01389d295..f7083c266 100644 --- a/docs/docs/architecture/multitenancy.md +++ b/docs/docs/architecture/multitenancy.md @@ -652,7 +652,7 @@ For emergency password resets, you can update the database directly: python3 -c " from mcpgateway.services.argon2_service import Argon2PasswordService from mcpgateway.db import SessionLocal -from mcpgateway.models import EmailUser +from mcpgateway.common.models import EmailUser service = Argon2PasswordService() hashed = service.hash_password('new_password') diff --git a/mcp-servers/templates/go/copier.yaml b/mcp-servers/templates/go/copier.yaml index e6d615ea5..61d2cb223 100644 --- a/mcp-servers/templates/go/copier.yaml +++ b/mcp-servers/templates/go/copier.yaml @@ -45,4 +45,3 @@ include_container: type: bool help: Include Dockerfile for a minimal runtime image default: true - diff --git a/mcpgateway/admin.py b/mcpgateway/admin.py index a7c5571f3..d02fd920c 100644 --- a/mcpgateway/admin.py +++ b/mcpgateway/admin.py @@ -51,12 +51,12 @@ from starlette.datastructures import UploadFile as StarletteUploadFile # First-Party +from mcpgateway.common.models import LogLevel from mcpgateway.config import settings from mcpgateway.db import get_db, GlobalConfig, ObservabilitySavedQuery, ObservabilitySpan, ObservabilityTrace from mcpgateway.db import Tool as DbTool from mcpgateway.db import utc_now from mcpgateway.middleware.rbac import get_current_user_with_permissions, require_permission -from mcpgateway.models import LogLevel from mcpgateway.schemas import ( A2AAgentCreate, A2AAgentRead, diff --git a/mcpgateway/cache/session_registry.py b/mcpgateway/cache/session_registry.py index db26ed2c2..07468f8f6 100644 --- a/mcpgateway/cache/session_registry.py +++ b/mcpgateway/cache/session_registry.py @@ -65,9 +65,9 @@ # First-Party from mcpgateway import __version__ +from mcpgateway.common.models import Implementation, InitializeResult, ServerCapabilities from mcpgateway.config import settings from mcpgateway.db import get_db, SessionMessageRecord, SessionRecord -from mcpgateway.models import Implementation, InitializeResult, ServerCapabilities from mcpgateway.services import PromptService, ResourceService, ToolService from mcpgateway.services.logging_service import LoggingService from mcpgateway.transports import SSETransport diff --git a/mcpgateway/common/__init__.py b/mcpgateway/common/__init__.py new file mode 100644 index 000000000..2f4c65db1 --- /dev/null +++ b/mcpgateway/common/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +"""Location: ./mcpgateway/common/__init__.py +Copyright 2025 +SPDX-License-Identifier: Apache-2.0 +Authors: Teryl Taylor + +Common ContextForge package for shared classes and functions. +""" diff --git a/mcpgateway/common/config.py b/mcpgateway/common/config.py new file mode 100644 index 000000000..5ab271fb2 --- /dev/null +++ b/mcpgateway/common/config.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +"""Location: ./mcpgateway/config.py +Copyright 2025 +SPDX-License-Identifier: Apache-2.0 +Authors: Mihai Criveti, Manav Gupta + +Common MCP Gateway Configuration settings used across subpackages. +This module defines configuration settings for the MCP Gateway using Pydantic. +It loads configuration from environment variables with sensible defaults. +""" + +# Standard +from functools import lru_cache + +# Third-Party +from pydantic_settings import BaseSettings + + +class Settings(BaseSettings): + """Validation settings for the security validator.""" + + # Validation patterns for safe display (configurable) + validation_dangerous_html_pattern: str = ( + r"<(script|iframe|object|embed|link|meta|base|form|img|svg|video|audio|source|track|area|map|canvas|applet|frame|frameset|html|head|body|style)\b|*(script|iframe|object|embed|link|meta|base|form|img|svg|video|audio|source|track|area|map|canvas|applet|frame|frameset|html|head|body|style)>" + ) + + validation_dangerous_js_pattern: str = r"(?i)(?:^|\s|[\"'`<>=])(javascript:|vbscript:|data:\s*[^,]*[;\s]*(javascript|vbscript)|\bon[a-z]+\s*=|<\s*script\b)" + + validation_allowed_url_schemes: list[str] = ["http://", "https://", "ws://", "wss://"] + + # Character validation patterns + validation_name_pattern: str = r"^[a-zA-Z0-9_.\-\s]+$" # Allow spaces for names + validation_identifier_pattern: str = r"^[a-zA-Z0-9_\-\.]+$" # No spaces for IDs + validation_safe_uri_pattern: str = r"^[a-zA-Z0-9_\-.:/?=&%]+$" + validation_unsafe_uri_pattern: str = r'[<>"\'\\]' + validation_tool_name_pattern: str = r"^[a-zA-Z][a-zA-Z0-9._-]*$" # MCP tool naming + validation_tool_method_pattern: str = r"^[a-zA-Z][a-zA-Z0-9_\./-]*$" + + # MCP-compliant size limits (configurable via env) + validation_max_name_length: int = 255 + validation_max_description_length: int = 8192 # 8KB + validation_max_template_length: int = 65536 # 64KB + validation_max_content_length: int = 1048576 # 1MB + validation_max_json_depth: int = 10 + validation_max_url_length: int = 2048 + validation_max_rpc_param_size: int = 262144 # 256KB + + validation_max_method_length: int = 128 + + # Allowed MIME types + validation_allowed_mime_types: list[str] = [ + "text/plain", + "text/html", + "text/css", + "text/markdown", + "text/javascript", + "application/json", + "application/xml", + "application/pdf", + "image/png", + "image/jpeg", + "image/gif", + "image/svg+xml", + "application/octet-stream", + ] + + # Rate limiting + validation_max_requests_per_minute: int = 60 + + # CLI settings + plugins_cli_markup_mode: str | None = None + plugins_cli_completion: bool = True + + +@lru_cache() +def get_settings() -> Settings: + """Get cached settings instance. + + Returns: + Settings: A cached instance of the Settings class. + + Examples: + >>> settings = get_settings() + >>> isinstance(settings, Settings) + True + >>> # Second call returns the same cached instance + >>> settings2 = get_settings() + >>> settings is settings2 + True + """ + # Instantiate a fresh Pydantic Settings object, + # loading from env vars or .env exactly once. + cfg = Settings() + # Validate that transport_type is correct; will + # raise if mis-configured. + # cfg.validate_transport() + # Ensure sqlite DB directories exist if needed. + # cfg.validate_database() + # Return the one-and-only Settings instance (cached). + return cfg + + +# Create settings instance +settings = get_settings() diff --git a/mcpgateway/models.py b/mcpgateway/common/models.py similarity index 98% rename from mcpgateway/models.py rename to mcpgateway/common/models.py index b868d9d18..f8704e917 100644 --- a/mcpgateway/models.py +++ b/mcpgateway/common/models.py @@ -16,7 +16,7 @@ - Capability definitions Examples: - >>> from mcpgateway.models import Role, LogLevel, TextContent + >>> from mcpgateway.common.models import Role, LogLevel, TextContent >>> Role.USER.value 'user' >>> Role.ASSISTANT.value @@ -1360,3 +1360,20 @@ class PermissionAudit(BaseModel): # Permission constants are imported from db.py to avoid duplication # Use Permissions class from mcpgateway.db instead of duplicate SystemPermissions + + +class TransportType(str, Enum): + """ + Enumeration of supported transport mechanisms for communication between components. + + Attributes: + SSE (str): Server-Sent Events transport. + HTTP (str): Standard HTTP-based transport. + STDIO (str): Standard input/output transport. + STREAMABLEHTTP (str): HTTP transport with streaming. + """ + + SSE = "SSE" + HTTP = "HTTP" + STDIO = "STDIO" + STREAMABLEHTTP = "STREAMABLEHTTP" diff --git a/mcpgateway/common/validators.py b/mcpgateway/common/validators.py new file mode 100644 index 000000000..4e8f2fa11 --- /dev/null +++ b/mcpgateway/common/validators.py @@ -0,0 +1,1190 @@ +# -*- coding: utf-8 -*- +"""Location: ./mcpgateway/common/validators.py +Copyright 2025 +SPDX-License-Identifier: Apache-2.0 +Authors: Mihai Criveti, Madhav Kandukuri + +SecurityValidator for MCP Gateway +This module defines the `SecurityValidator` class, which provides centralized, configurable +validation logic for user-generated content in MCP-based applications. + +The validator enforces strict security and structural rules across common input types such as: +- Display text (e.g., names, descriptions) +- Identifiers and tool names +- URIs and URLs +- JSON object depth +- Templates (including limited HTML/Jinja2) +- MIME types + +Key Features: +- Pattern-based validation using settings-defined regex for HTML/script safety +- Configurable max lengths and depth limits +- Whitelist-based URL scheme and MIME type validation +- Safe escaping of user-visible text fields +- Reusable static/class methods for field-level and form-level validation + +Intended to be used with Pydantic or similar schema-driven systems to validate and sanitize +user input in a consistent, centralized way. + +Dependencies: +- Standard Library: re, html, logging, urllib.parse +- First-party: `settings` from `mcpgateway.config` + +Example usage: + SecurityValidator.validate_name("my_tool", field_name="Tool Name") + SecurityValidator.validate_url("https://example.com") + SecurityValidator.validate_json_depth({...}) + +Examples: + >>> from mcpgateway.common.validators import SecurityValidator + >>> SecurityValidator.sanitize_display_text('Test', 'test') + '<b>Test</b>' + >>> SecurityValidator.validate_name('valid_name-123', 'test') + 'valid_name-123' + >>> SecurityValidator.validate_identifier('my.test.id_123', 'test') + 'my.test.id_123' + >>> SecurityValidator.validate_json_depth({'a': {'b': 1}}) + >>> SecurityValidator.validate_json_depth({'a': 1}) +""" + +# Standard +import html +import logging +import re +from urllib.parse import urlparse +import uuid + +# First-Party +from mcpgateway.common.config import settings + +logger = logging.getLogger(__name__) + + +class SecurityValidator: + """Configurable validation with MCP-compliant limits""" + + # Configurable patterns (from settings) + DANGEROUS_HTML_PATTERN = ( + settings.validation_dangerous_html_pattern + ) # Default: '<(script|iframe|object|embed|link|meta|base|form|img|svg|video|audio|source|track|area|map|canvas|applet|frame|frameset|html|head|body|style)\b|*(script|iframe|object|embed|link|meta|base|form|img|svg|video|audio|source|track|area|map|canvas|applet|frame|frameset|html|head|body|style)>' + DANGEROUS_JS_PATTERN = settings.validation_dangerous_js_pattern # Default: javascript:|vbscript:|on\w+\s*=|data:.*script + ALLOWED_URL_SCHEMES = settings.validation_allowed_url_schemes # Default: ["http://", "https://", "ws://", "wss://"] + + # Character type patterns + NAME_PATTERN = settings.validation_name_pattern # Default: ^[a-zA-Z0-9_\-\s]+$ + IDENTIFIER_PATTERN = settings.validation_identifier_pattern # Default: ^[a-zA-Z0-9_\-\.]+$ + VALIDATION_SAFE_URI_PATTERN = settings.validation_safe_uri_pattern # Default: ^[a-zA-Z0-9_\-.:/?=&%]+$ + VALIDATION_UNSAFE_URI_PATTERN = settings.validation_unsafe_uri_pattern # Default: [<>"\'\\] + TOOL_NAME_PATTERN = settings.validation_tool_name_pattern # Default: ^[a-zA-Z][a-zA-Z0-9_-]*$ + + # MCP-compliant limits (configurable) + MAX_NAME_LENGTH = settings.validation_max_name_length # Default: 255 + MAX_DESCRIPTION_LENGTH = settings.validation_max_description_length # Default: 8192 (8KB) + MAX_TEMPLATE_LENGTH = settings.validation_max_template_length # Default: 65536 + MAX_CONTENT_LENGTH = settings.validation_max_content_length # Default: 1048576 (1MB) + MAX_JSON_DEPTH = settings.validation_max_json_depth # Default: 10 + MAX_URL_LENGTH = settings.validation_max_url_length # Default: 2048 + + @classmethod + def sanitize_display_text(cls, value: str, field_name: str) -> str: + """Ensure text is safe for display in UI by escaping special characters + + Args: + value (str): Value to validate + field_name (str): Name of field being validated + + Returns: + str: Value if acceptable + + Raises: + ValueError: When input is not acceptable + + Examples: + Basic HTML escaping: + + >>> SecurityValidator.sanitize_display_text('Hello World', 'test') + 'Hello World' + >>> SecurityValidator.sanitize_display_text('Hello World', 'test') + 'Hello <b>World</b>' + + Empty/None handling: + + >>> SecurityValidator.sanitize_display_text('', 'test') + '' + >>> SecurityValidator.sanitize_display_text(None, 'test') #doctest: +SKIP + + Dangerous script patterns: + + >>> SecurityValidator.sanitize_display_text('alert();', 'test') + 'alert();' + >>> SecurityValidator.sanitize_display_text('javascript:alert(1)', 'test') + Traceback (most recent call last): + ... + ValueError: test contains script patterns that may cause display issues + + Polyglot attack patterns: + + >>> SecurityValidator.sanitize_display_text('"; alert()', 'test') + Traceback (most recent call last): + ... + ValueError: test contains potentially dangerous character sequences + >>> SecurityValidator.sanitize_display_text('-->test', 'test') + '-->test' + >>> SecurityValidator.sanitize_display_text('-->') + Traceback (most recent call last): + ... + ValueError: Template contains HTML tags that may interfere with proper display + >>> SecurityValidator.validate_template('Test ') + Traceback (most recent call last): + ... + ValueError: Template contains HTML tags that may interfere with proper display + >>> SecurityValidator.validate_template('
') + Traceback (most recent call last): + ... + ValueError: Template contains HTML tags that may interfere with proper display + + Event handlers blocked: + + >>> SecurityValidator.validate_template('