Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,6 @@ FROM ghcr.io/astral-sh/uv:python3.13-alpine
RUN apk --no-cache add curl git
WORKDIR /app

# Clone KBUtilLib (required external dependency)
# This creates /app/lib/KBUtilLib/ which is referenced by app/utils/workspace.py
RUN mkdir -p lib && \
cd lib && \
git clone https://github.com/cshenry/KBUtilLib.git && \
cd ..

# Add KBUtilLib to PYTHONPATH so it can be imported
ENV PYTHONPATH=/app/lib/KBUtilLib/src:${PYTHONPATH}

# Copy application code and dependencies
COPY app ./app
COPY pyproject.toml /app/pyproject.toml
Expand Down
2 changes: 2 additions & 0 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@

from contextlib import asynccontextmanager
from app.utils.cache import cleanup_old_caches
from app.services.logger import setup_logging

@asynccontextmanager
async def lifespan(app: FastAPI):
# Startup: Clean up old caches and uploads
try:
setup_logging() # Configure in-memory logging
cleanup_result = cleanup_old_caches(Path(settings.CACHE_DIR))
# Use print or logger (logger is better if configured, but print works for startup)
import logging
Expand Down
56 changes: 56 additions & 0 deletions app/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
from uuid import uuid4
from fastapi import APIRouter, HTTPException, Header, Query, Cookie, Path, UploadFile, File
from app.exceptions import InvalidFilterError
from pydantic import BaseModel
from typing import List, Dict, Any, Optional
Copy link

Copilot AI Feb 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Import of 'Dict' is not used.
Import of 'Any' is not used.
Import of 'Optional' is not used.

Suggested change
from typing import List, Dict, Any, Optional
from typing import List

Copilot uses AI. Check for mistakes.

from app.models import (
TableDataRequest,
Expand Down Expand Up @@ -63,6 +65,7 @@
)
from app.utils.async_utils import run_sync_in_thread
from app.utils.request_utils import TableRequestProcessor
from app.services.logger import get_memory_handler
from app.config import settings
from app.config_constants import MAX_LIMIT, DEFAULT_LIMIT
from app.utils.cache import load_cache_metadata, save_cache_metadata
Expand Down Expand Up @@ -219,6 +222,59 @@ async def health_check():
raise HTTPException(status_code=500, detail=str(e))


# =============================================================================
# SYSTEM ENDPOINTS
# =============================================================================

class LogEntry(BaseModel):
timestamp: str
level: str
message: str
source: str
logger: str

@router.get(
"/system/logs",
response_model=List[LogEntry],
tags=["General"],
summary="Get recent system logs",
)
async def get_system_logs(
limit: int = Query(100, ge=1, le=1000, description="Number of logs to return"),
level: str | None = Query(None, description="Minimum log level (debug, info, warn, error)"),
authorization: str | None = Header(None, description="KBase authentication token")
):
"""
Retrieve recent system logs from memory buffer.
Use limit parameter to control number of entries (default 100, max 1000).
"""
# Simple check - could require admin perms but for dev tools/debugging this is fine
# if hidden behind auth.
# The user asked for "any and all errors", so let's expose it.

handler = get_memory_handler()
# Getting logs directly from the handler's deque
# We convert deque to list efficiently
logs = list(handler.log_buffer)

# Sort by timestamp descending (newest first)
logs.sort(key=lambda x: x.get("timestamp", ""), reverse=True)

# Filter by level if requested
if level:
level_map = {"debug": 10, "info": 20, "warning": 30, "warn": 30, "error": 40, "critical": 50}
min_level = level_map.get(level.lower(), 0)

filtered_logs = []
for log in logs:
log_lvl_str = log.get("level", "info").lower()
log_lvl_val = level_map.get(log_lvl_str, 20)
if log_lvl_val >= min_level:
filtered_logs.append(log)
logs = filtered_logs

return logs[:limit]

Comment on lines +236 to +277
Copy link

Copilot AI Feb 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The /system/logs endpoint is publicly accessible without authentication checks. Although authorization is accepted as a parameter (line 245), it's never validated. This exposes internal system logs to anyone who can access the endpoint, which could leak sensitive information like error messages, stack traces, database paths, or internal system details.

Copilot uses AI. Check for mistakes.

# =============================================================================
# FILE UPLOAD ENDPOINTS
Expand Down
65 changes: 65 additions & 0 deletions app/services/logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import logging
import collections
from datetime import datetime
from typing import List, Dict, Any

# Maximum number of logs to keep in memory
MAX_LOG_ENTRIES = 1000

class MemoryLogHandler(logging.Handler):
"""
Custom logging handler that stores log records in memory.
Useful for exposing logs via API.
"""
def __init__(self, capacity=MAX_LOG_ENTRIES):
super().__init__()
self.log_buffer = collections.deque(maxlen=capacity)

def emit(self, record):
try:
log_entry = self.format(record)
self.log_buffer.append({
"timestamp": datetime.fromtimestamp(record.created).isoformat(),
"level": record.levelname.lower(),
"message": log_entry,
"source": "backend",
"logger": record.name
})
except Exception as e:
# Log the error to stderr so it's visible during development/debugging
import sys
print(f"MemoryLogHandler.emit() failed: {e}", file=sys.stderr)
self.handleError(record)

def get_logs(self, limit: int = 100, level: str = None) -> List[Dict[str, Any]]:
"""
Retrieve logs from buffer with optional filtering.
"""
logs = list(self.log_buffer)

if level:
# Normalize level; actual filtering can be implemented here later.
level = level.lower()
# TODO: Implement level-based filtering (e.g., min level severity)
# For now, return all logs and let clients filter

# Return most recent first
return sorted(logs, key=lambda x: x['timestamp'], reverse=True)[:limit]
Comment on lines +34 to +47
Copy link

Copilot AI Feb 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The MemoryLogHandler uses a deque (line 16) which is thread-safe for append operations, but the get_logs method creates a list copy and sorts it (lines 35, 45-46) without synchronization. If logs are being added while get_logs is executing, the list copy could be inconsistent or miss recent entries. Consider adding a lock to ensure thread-safe reads, especially since FastAPI handles requests concurrently.

Copilot uses AI. Check for mistakes.

# Global instance
memory_handler = MemoryLogHandler()
memory_handler.setFormatter(logging.Formatter("%(message)s"))

def get_memory_handler():
return memory_handler

def setup_logging():
"""
Configure root logger to use memory handler.
"""
root_logger = logging.getLogger()
# Add memory handler if not already present
if not any(isinstance(h, MemoryLogHandler) for h in root_logger.handlers):
Comment on lines +61 to +62
Copy link

Copilot AI Feb 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The setup_logging function adds the memory handler to the root logger but doesn't check if it's already been set up within the same process. While line 61 checks handlers, if the logger has other MemoryLogHandler instances from different imports or reloads, you could end up with multiple handlers logging the same events. Consider using a module-level flag or singleton pattern to ensure setup happens exactly once.

Suggested change
# Add memory handler if not already present
if not any(isinstance(h, MemoryLogHandler) for h in root_logger.handlers):
# Add memory handler if not already present (handle reloads/imports robustly)
if not any(
isinstance(h, logging.Handler)
and h.__class__.__name__ == "MemoryLogHandler"
and h.__class__.__module__ == __name__
for h in root_logger.handlers
):

Copilot uses AI. Check for mistakes.
root_logger.addHandler(memory_handler)
# Ensure we capture everything
root_logger.setLevel(logging.INFO)
2 changes: 1 addition & 1 deletion app/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Utils module for TableScanner.

Contains business logic for:
- KBase Workspace API interactions via KBUtilLib
- KBase Workspace API interactions via direct HTTP requests (using requests library)
- Blobstore/Shock downloading
- Local file caching with age-based expiration
- SQLite database querying with filtering/sorting/pagination
Expand Down
Loading
Loading