aniongithub · aniongithub · Sep 6, 2025 · Sep 5, 2025 · Sep 5, 2025 · Sep 6, 2025
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -22,7 +22,6 @@
 			"extensions": [
 				"ms-python.python",
 				"zaaack.markdown-editor",
-				"bierner.emojisense",
 				"ms-python.debugpy"
 			]
         }

diff --git a/.env b/.env
@@ -1,7 +1,5 @@
-DEFAULT_MODEL="qwen3:0.6b"
-DEFAULT_IMAGE_MODEL="qwen2.5vl:3b"
+DEFAULT_MODEL="qwen3:0.6b-q4_K_M"
 
 OLLAMA_URL="http://ollama:11434"
 DB_PATH="/data/enmemoryalpha_db"
-TEXT_COLLECTION_NAME="memoryalpha_text"
-IMAGE_COLLECTION_NAME="memoryalpha_images"
+TEXT_COLLECTION_NAME="memoryalpha_text"
diff --git a/.github/workflows/ci-build.yml b/.github/workflows/ci-build.yml
@@ -40,14 +40,15 @@ jobs:
 
     - name: Test ask endpoint
       run: |
-        # Test the synchronous ask endpoint with a simple query
-        response=$(curl -s -f "http://localhost:8000/memoryalpha/rag/ask?question=What%20is%20the%20Enterprise?&thinkingmode=DISABLED&max_tokens=100&top_k=3")
-
+        # Test the ask endpoint with a simple query
+        response=$(curl -X POST "http://localhost:8000/memoryalpha/rag/ask"   -H "Content-Type: application/json"   -d '{
+            "question": "What is the color of Vulcan blood?"
+          }')
         # Check if response contains expected content
-        if echo "$response" | grep -q "Enterprise"; then
+        if echo "$response" | grep -q "green"; then
           echo "✅ Ask endpoint test passed"
         else
-          echo "❌ Ask endpoint test failed - no relevant content found"
+          echo "❌ Ask endpoint test failed, answer did not contain expected content"
           echo "Response: $response"
           exit 1
         fi

diff --git a/.github/workflows/pr-check.yml b/.github/workflows/pr-check.yml
@@ -39,14 +39,15 @@ jobs:
 
     - name: Test ask endpoint
       run: |
-        # Test the synchronous ask endpoint with a simple query
-        response=$(curl -s -f "http://localhost:8000/memoryalpha/rag/ask?question=What%20is%20the%20Enterprise?&thinkingmode=DISABLED&max_tokens=100&top_k=3")
-
+        # Test the ask endpoint with a simple query
+        response=$(curl -X POST "http://localhost:8000/memoryalpha/rag/ask"   -H "Content-Type: application/json"   -d '{
+            "question": "What was the name of human who discovered warp drive?"
+          }')
         # Check if response contains expected content
-        if echo "$response" | grep -q "Enterprise"; then
+        if echo "$response" | grep -q "Zefram Cochrane"; then
           echo "✅ Ask endpoint test passed"
         else
-          echo "❌ Ask endpoint test failed - no relevant content found"
+          echo "❌ Ask endpoint test failed, answer did not contain expected content"
           echo "Response: $response"
           exit 1
         fi

diff --git a/api/main.py b/api/main.py
@@ -3,7 +3,6 @@
 from fastapi import FastAPI
 from .memoryalpha.health import router as health_router
 from .memoryalpha.ask import router as ask_router
-from .memoryalpha.identify import router as identify_router
 
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -21,5 +20,4 @@ async def lifespan(app: FastAPI):
 app = FastAPI(lifespan=lifespan)
 
 app.include_router(health_router)
-app.include_router(ask_router)
-app.include_router(identify_router)
+app.include_router(ask_router)
diff --git a/api/memoryalpha/ask.py b/api/memoryalpha/ask.py
@@ -1,29 +1,53 @@
-from fastapi import APIRouter, Query
+from fastapi import APIRouter, Query, Body
 from fastapi.responses import JSONResponse
+from pydantic import BaseModel
+from typing import Optional
 
-from .rag import MemoryAlphaRAG, ThinkingMode
+from .rag import MemoryAlphaRAG
 
 router = APIRouter()
 
 # Singleton or global instance for demo; in production, manage lifecycle properly
 rag_instance = MemoryAlphaRAG()
-ThinkingMode = ThinkingMode
+
+class AskRequest(BaseModel):
+    question: str
+    max_tokens: Optional[int] = 2048
+    top_k: Optional[int] = 10
+    top_p: Optional[float] = 0.8
+    temperature: Optional[float] = 0.3
+
+@router.post("/memoryalpha/rag/ask")
+def ask_endpoint_post(request: AskRequest):
+    """
+    Query the RAG pipeline and return the full response.
+    Accepts POST requests with JSON payload for cleaner API usage.
+    """
+    try:
+        answer = rag_instance.ask(
+            request.question, 
+            max_tokens=request.max_tokens,
+            top_k=request.top_k,
+            top_p=request.top_p,
+            temperature=request.temperature
+        )
+        return JSONResponse(content={"response": answer})
+    except Exception as e:
+        return JSONResponse(status_code=500, content={"error": str(e)})
 
 @router.get("/memoryalpha/rag/ask")
 def ask_endpoint(
     question: str = Query(..., description="The user question"),
-    thinkingmode: str = Query("DISABLED", description="Thinking mode: DISABLED, QUIET, or VERBOSE"),
     max_tokens: int = Query(2048, description="Maximum tokens to generate"),
     top_k: int = Query(10, description="Number of documents to retrieve"),
     top_p: float = Query(0.8, description="Sampling parameter"),
     temperature: float = Query(0.3, description="Randomness/creativity of output")
 ):
     """
-    Query the RAG pipeline and return the full response (including thinking if enabled).
+    Query the RAG pipeline and return the full response.
+    Now uses advanced tool-enabled RAG by default for better results.
     """
     try:
-        # Set the thinking mode for this request
-        rag_instance.thinking_mode = ThinkingMode[thinkingmode.upper()]
         answer = rag_instance.ask(
             question, 
             max_tokens=max_tokens,

diff --git a/api/memoryalpha/identify.py b/api/memoryalpha/identify.py