diff --git a/app/main.py b/app/main.py index ed50b03..47308ae 100644 --- a/app/main.py +++ b/app/main.py @@ -96,30 +96,31 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: logger.info("Proxy Configuration: None") # Initialize database connections + # Always initialize master DB — admin routes are available in both modes + from app.services.master_db import master_db + + await master_db.init_db() + await master_db.verify_admin_exists() + logger.info("Master database initialized") + if settings.multi_tenant: - # Multi-tenant: only initialize master DB (tenant DBs are provisioned on demand) - from app.services.master_db import master_db from app.services.tenant_manager import tenant_connection_manager - await master_db.init_db() - await master_db.verify_admin_exists() tenant_connection_manager.start_cleanup_task() - logger.info("Master database initialized") else: - # Single-tenant: connect to the default tenant database + # Single-tenant: also connect to the default tenant database await init_db() yield # Shutdown await close_db() + await master_db.close() if settings.multi_tenant: - from app.services.master_db import master_db from app.services.tenant_manager import tenant_connection_manager await tenant_connection_manager.close_all() - await master_db.close() logger.info("Milestone API shutdown complete") diff --git a/app/services/master_db.py b/app/services/master_db.py index 81d8066..14845a5 100644 --- a/app/services/master_db.py +++ b/app/services/master_db.py @@ -10,6 +10,7 @@ This is separate from tenant databases. """ +import asyncio import logging from collections.abc import AsyncGenerator from contextlib import asynccontextmanager @@ -72,13 +73,39 @@ async def init_db(self): Verifies connectivity and ensures the schema is up to date (e.g., organizations table and related tenant columns). + + Retries up to 5 times with exponential backoff if PostgreSQL + is not yet ready (e.g., container still starting). """ if self._initialized: return - # Verify we can connect - async with self.engine.connect() as conn: - await conn.execute(text("SELECT 1")) + max_retries = 5 + for attempt in range(1, max_retries + 1): + try: + # Verify we can connect + async with self.engine.connect() as conn: + await conn.execute(text("SELECT 1")) + logger.info("Master DB: Connection verified (attempt %d)", attempt) + break + except Exception as e: + if attempt < max_retries: + wait = 2**attempt # 2, 4, 8, 16, 32 seconds + logger.warning( + "Master DB: Connection failed (attempt %d/%d): %s. Retrying in %ds...", + attempt, + max_retries, + e, + wait, + ) + await asyncio.sleep(wait) + else: + logger.error( + "Master DB: Connection failed after %d attempts: %s", + max_retries, + e, + ) + raise # Apply any missing schema migrations await self._apply_pending_migrations() @@ -93,48 +120,56 @@ async def _apply_pending_migrations(self): the organizations feature was added. It checks for missing tables and columns and applies them idempotently. - Errors are caught and logged so the app can still start even if - migrations fail (e.g., due to insufficient DB permissions). + Critical tables (admin_users, admin_sessions) must succeed or the + app will not start. Optional migrations (organizations, SSO) are + caught and logged so the app can still start if they fail. """ - try: - async with self.engine.begin() as conn: - # Ensure uuid-ossp extension exists - await conn.execute(text('CREATE EXTENSION IF NOT EXISTS "uuid-ossp"')) - - # Ensure admin_users table exists (needed for login) - await conn.execute( - text( - "CREATE TABLE IF NOT EXISTS admin_users (" - " id SERIAL PRIMARY KEY," - " email VARCHAR(255) NOT NULL UNIQUE," - " password_hash TEXT NOT NULL," - " name VARCHAR(255)," - " role VARCHAR(20) DEFAULT 'admin'," - " active INTEGER DEFAULT 1," - " must_change_password INTEGER DEFAULT 0," - " created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP," - " last_login TIMESTAMP" - ")" - ) + # --- Critical tables: admin_users and admin_sessions --- + # These are required for the app to function. Errors here are fatal. + async with self.engine.begin() as conn: + # Ensure uuid-ossp extension exists + await conn.execute(text('CREATE EXTENSION IF NOT EXISTS "uuid-ossp"')) + + # Ensure admin_users table exists (needed for login) + await conn.execute( + text( + "CREATE TABLE IF NOT EXISTS admin_users (" + " id SERIAL PRIMARY KEY," + " email VARCHAR(255) NOT NULL UNIQUE," + " password_hash TEXT NOT NULL," + " name VARCHAR(255)," + " role VARCHAR(20) DEFAULT 'admin'," + " active INTEGER DEFAULT 1," + " must_change_password INTEGER DEFAULT 0," + " created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP," + " last_login TIMESTAMP" + ")" ) + ) - # Ensure admin_sessions table exists (needed for login) - await conn.execute( - text( - "CREATE TABLE IF NOT EXISTS admin_sessions (" - " sid TEXT PRIMARY KEY," - " sess TEXT NOT NULL," - " expired BIGINT NOT NULL" - ")" - ) + # Ensure admin_sessions table exists (needed for login) + await conn.execute( + text( + "CREATE TABLE IF NOT EXISTS admin_sessions (" + " sid TEXT PRIMARY KEY," + " sess TEXT NOT NULL," + " expired BIGINT NOT NULL" + ")" ) - await conn.execute( - text( - "CREATE INDEX IF NOT EXISTS idx_admin_sessions_expired " - "ON admin_sessions(expired)" - ) + ) + await conn.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_admin_sessions_expired " + "ON admin_sessions(expired)" ) + ) + + logger.info("Master DB: Core tables verified (admin_users, admin_sessions)") + # --- Optional migrations: organizations, SSO, tenant columns --- + # These can fail without preventing the app from starting. + try: + async with self.engine.begin() as conn: # Check if organizations table exists result = await conn.execute( text( @@ -286,7 +321,7 @@ async def _apply_pending_migrations(self): logger.info("Master DB: must_change_password column added") except Exception as e: - logger.warning("Auto-migration failed: %s", e) + logger.warning("Auto-migration for optional tables failed: %s", e) logger.warning("The app will continue, but organization features may not work.") logger.warning( "Run manually: python migrations/run_migration_master.py add_organizations" diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 7b1c01c..7140383 100755 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -10,31 +10,35 @@ set -e wait_for_db() { local host="$1" local port="$2" - local max_attempts=15 + local label="$3" + local max_attempts="${DB_WAIT_ATTEMPTS:-30}" local attempt=1 - echo "Waiting for PostgreSQL at ${host}:${port}..." + echo "Waiting for PostgreSQL ${label} at ${host}:${port} (max ${max_attempts} attempts)..." while [ $attempt -le $max_attempts ]; do - if python -c " -import socket + # Use Python socket check, but capture the error for diagnostics + error=$(python -c " +import socket, sys s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.settimeout(3) try: s.connect(('${host}', ${port})) s.close() - exit(0) -except: - exit(1) -" 2>/dev/null; then - echo "PostgreSQL is ready at ${host}:${port}" + sys.exit(0) +except Exception as e: + print(str(e), file=sys.stderr) + sys.exit(1) +" 2>&1) + if [ $? -eq 0 ]; then + echo "PostgreSQL ${label} is ready at ${host}:${port}" return 0 fi - echo " Attempt ${attempt}/${max_attempts} - waiting..." + echo " Attempt ${attempt}/${max_attempts} - ${error:-connection failed}" sleep 2 attempt=$((attempt + 1)) done - echo "WARNING: PostgreSQL not ready after ${max_attempts} attempts, starting app anyway..." + echo "WARNING: PostgreSQL ${label} not ready after ${max_attempts} attempts, starting app anyway..." return 0 } @@ -42,8 +46,14 @@ except: DB_HOST="${DB_HOST:-localhost}" DB_PORT="${DB_PORT:-5432}" -# Wait for the database (non-fatal: app starts regardless) -wait_for_db "$DB_HOST" "$DB_PORT" +# Wait for the main (tenant) database +wait_for_db "$DB_HOST" "$DB_PORT" "(tenant DB)" + +# In multi-tenant mode, also wait for master DB if it's on a different host +if [ "${MULTI_TENANT}" = "true" ] && [ -n "${MASTER_DB_HOST}" ] && [ "${MASTER_DB_HOST}" != "${DB_HOST}" ]; then + MASTER_DB_PORT="${MASTER_DB_PORT:-5432}" + wait_for_db "$MASTER_DB_HOST" "$MASTER_DB_PORT" "(master DB)" +fi # Run auto-initialization if AUTO_INIT_DB is set if [ "${AUTO_INIT_DB}" = "true" ]; then