Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
"extensions": [
"ms-python.python",
"zaaack.markdown-editor",
"bierner.emojisense",
"ms-python.debugpy"
]
}
Expand Down
6 changes: 2 additions & 4 deletions .env
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
DEFAULT_MODEL="qwen3:0.6b"
DEFAULT_IMAGE_MODEL="qwen2.5vl:3b"
DEFAULT_MODEL="qwen3:0.6b-q4_K_M"

OLLAMA_URL="http://ollama:11434"
DB_PATH="/data/enmemoryalpha_db"
TEXT_COLLECTION_NAME="memoryalpha_text"
IMAGE_COLLECTION_NAME="memoryalpha_images"
TEXT_COLLECTION_NAME="memoryalpha_text"
11 changes: 6 additions & 5 deletions .github/workflows/ci-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,15 @@ jobs:

- name: Test ask endpoint
run: |
# Test the synchronous ask endpoint with a simple query
response=$(curl -s -f "http://localhost:8000/memoryalpha/rag/ask?question=What%20is%20the%20Enterprise?&thinkingmode=DISABLED&max_tokens=100&top_k=3")

# Test the ask endpoint with a simple query
response=$(curl -X POST "http://localhost:8000/memoryalpha/rag/ask" -H "Content-Type: application/json" -d '{
"question": "What is the color of Vulcan blood?"
}')
# Check if response contains expected content
if echo "$response" | grep -q "Enterprise"; then
if echo "$response" | grep -q "green"; then
echo "✅ Ask endpoint test passed"
else
echo "❌ Ask endpoint test failed - no relevant content found"
echo "❌ Ask endpoint test failed, answer did not contain expected content"
echo "Response: $response"
exit 1
fi
Expand Down
11 changes: 6 additions & 5 deletions .github/workflows/pr-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,15 @@ jobs:

- name: Test ask endpoint
run: |
# Test the synchronous ask endpoint with a simple query
response=$(curl -s -f "http://localhost:8000/memoryalpha/rag/ask?question=What%20is%20the%20Enterprise?&thinkingmode=DISABLED&max_tokens=100&top_k=3")

# Test the ask endpoint with a simple query
response=$(curl -X POST "http://localhost:8000/memoryalpha/rag/ask" -H "Content-Type: application/json" -d '{
"question": "What was the name of human who discovered warp drive?"
}')
# Check if response contains expected content
if echo "$response" | grep -q "Enterprise"; then
if echo "$response" | grep -q "Zefram Cochrane"; then
echo "✅ Ask endpoint test passed"
else
echo "❌ Ask endpoint test failed - no relevant content found"
echo "❌ Ask endpoint test failed, answer did not contain expected content"
echo "Response: $response"
exit 1
fi
Expand Down
4 changes: 1 addition & 3 deletions api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from fastapi import FastAPI
from .memoryalpha.health import router as health_router
from .memoryalpha.ask import router as ask_router
from .memoryalpha.identify import router as identify_router

# Configure logging
logging.basicConfig(level=logging.INFO)
Expand All @@ -21,5 +20,4 @@ async def lifespan(app: FastAPI):
app = FastAPI(lifespan=lifespan)

app.include_router(health_router)
app.include_router(ask_router)
app.include_router(identify_router)
app.include_router(ask_router)
38 changes: 31 additions & 7 deletions api/memoryalpha/ask.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,53 @@
from fastapi import APIRouter, Query
from fastapi import APIRouter, Query, Body
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from typing import Optional

from .rag import MemoryAlphaRAG, ThinkingMode
from .rag import MemoryAlphaRAG

router = APIRouter()

# Singleton or global instance for demo; in production, manage lifecycle properly
rag_instance = MemoryAlphaRAG()
ThinkingMode = ThinkingMode

class AskRequest(BaseModel):
question: str
max_tokens: Optional[int] = 2048
top_k: Optional[int] = 10
top_p: Optional[float] = 0.8
temperature: Optional[float] = 0.3

@router.post("/memoryalpha/rag/ask")
def ask_endpoint_post(request: AskRequest):
"""
Query the RAG pipeline and return the full response.
Accepts POST requests with JSON payload for cleaner API usage.
"""
try:
answer = rag_instance.ask(
request.question,
max_tokens=request.max_tokens,
top_k=request.top_k,
top_p=request.top_p,
temperature=request.temperature
)
return JSONResponse(content={"response": answer})
except Exception as e:
return JSONResponse(status_code=500, content={"error": str(e)})

@router.get("/memoryalpha/rag/ask")
def ask_endpoint(
question: str = Query(..., description="The user question"),
thinkingmode: str = Query("DISABLED", description="Thinking mode: DISABLED, QUIET, or VERBOSE"),
max_tokens: int = Query(2048, description="Maximum tokens to generate"),
top_k: int = Query(10, description="Number of documents to retrieve"),
top_p: float = Query(0.8, description="Sampling parameter"),
temperature: float = Query(0.3, description="Randomness/creativity of output")
):
"""
Query the RAG pipeline and return the full response (including thinking if enabled).
Query the RAG pipeline and return the full response.
Now uses advanced tool-enabled RAG by default for better results.
"""
try:
# Set the thinking mode for this request
rag_instance.thinking_mode = ThinkingMode[thinkingmode.upper()]
answer = rag_instance.ask(
question,
max_tokens=max_tokens,
Expand Down
31 changes: 0 additions & 31 deletions api/memoryalpha/identify.py

This file was deleted.

Loading