diff --git a/.github/workflows/build-backend.yml b/.github/workflows/build-backend.yml index 9843e96..39a5fff 100644 --- a/.github/workflows/build-backend.yml +++ b/.github/workflows/build-backend.yml @@ -25,7 +25,7 @@ jobs: run: | # Start container in background docker run -d --name spendoo-test \ - -e GROQ_API_KEY=${{ secrets.GROQ_API_KEY }} -e SPENDOO_DEPLOY=${{ secrets.SPENDOO_DEPLOY }} -e SPENDOO_ALLOWED_IP=${{ secrets.SPENDOO_ALLOWED_IP }} -p 8000:8000 spendoo-ai-backend + -e GROQ_API_KEY=${{ secrets.GROQ_API_KEY }} -e MISTRAL_API_KEY=${{ secrets.MISTRAL_API_KEY }} -e SPENDOO_DEPLOY=${{ secrets.SPENDOO_DEPLOY }} -e SPENDOO_ALLOWED_IP=${{ secrets.SPENDOO_ALLOWED_IP }} -p 8000:8000 spendoo-ai-backend # Wait for FastAPI to start sleep 10 # Check health endpoint diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 7a67076..203d828 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -30,5 +30,5 @@ jobs: docker stop spendoo-ai-container || true docker rm spendoo-ai-container || true docker run -d --restart=always --name spendoo-ai-container -p 8000:8000 \ - -e GROQ_API_KEY="${{ secrets.GROQ_API_KEY }}" -e SPENDOO_DEPLOY="${{ secrets.SPENDOO_DEPLOY }}" -e SPENDOO_ALLOWED_IP="${{ secrets.SPENDOO_ALLOWED_IP }}" josephsameh/spendoo-ai-backend:latest + -e GROQ_API_KEY="${{ secrets.GROQ_API_KEY }}" -e MISTRAL_API_KEY="${{ secrets.MISTRAL_API_KEY }}" -e SPENDOO_DEPLOY="${{ secrets.SPENDOO_DEPLOY }}" -e SPENDOO_ALLOWED_IP="${{ secrets.SPENDOO_ALLOWED_IP }}" josephsameh/spendoo-ai-backend:latest docker image prune -f \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 134f472..f246f9f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,7 @@ pytest>=7.0 openai>=1.0,<2 fastapi>=0.68,<1 uvicorn>=0.15,<1 -python-dotenv>=0.19,<1 \ No newline at end of file +mistralai==1.12.4 +python-dotenv>=0.19,<1 +groq>=0.1,<1 +python-multipart>=0.0.5,<1 \ No newline at end of file diff --git a/spendoo/categorization/service.py b/spendoo/categorization/service.py index 5c5fc33..cfa2a1c 100644 --- a/spendoo/categorization/service.py +++ b/spendoo/categorization/service.py @@ -15,7 +15,7 @@ class CategorizationService: def __init__(self): self.llm = LLMClient() - self.model_name = "openai/gpt-oss-20b" + self.model_name = "openai/gpt-oss-120b" def extract(self, text: str): @@ -24,19 +24,17 @@ def extract(self, text: str): Extract all transaction items from the text below. - For each item return: + For each item try to return: - id (incremental starting from 1 in order of appearance) - item_name - - quantity (if not mentioned assume 1) - - unit_price - - total_price (quantity × unit_price) + - price - category_id (choose one id from the list below) Available categories: {category_block} - Also calculate grand_total (sum of total_price). + Also calculate grand_total or extract it if it's explicitly mentioned in the text. If you can't find a clear grand total, sum up the item prices. If no suitable category exists return null. @@ -47,9 +45,7 @@ def extract(self, text: str): {{ "id": 1, "item_name": "...", - "quantity": 1, - "unit_price": 0, - "total_price": 0, + "price": 0, "category": "...", "category_id": null }} diff --git a/spendoo/core/config.py b/spendoo/core/config.py index 6f8b706..67f548e 100644 --- a/spendoo/core/config.py +++ b/spendoo/core/config.py @@ -5,5 +5,5 @@ class Settings: GROQ_API_KEY: str = os.getenv("GROQ_API_KEY") - + MISTRAL_API_KEY: str = os.getenv("MISTRAL_API_KEY") settings = Settings() \ No newline at end of file diff --git a/spendoo/ocr/pipeline.py b/spendoo/ocr/pipeline.py new file mode 100644 index 0000000..add9955 --- /dev/null +++ b/spendoo/ocr/pipeline.py @@ -0,0 +1,21 @@ +from spendoo.ocr.service import OCRService +from spendoo.categorization.service import CategorizationService +import base64 + +class ReceiptPipeline: + + def __init__(self): + + self.ocr = OCRService() + self.extractor = CategorizationService() + + def process_receipt(self, image_bytes): + + base64_image = base64.b64encode(image_bytes).decode("utf-8") + # Step 1: OCR + receipt_text = self.ocr.extract_text(base64_image) + + # Step 2: LLM extraction + structured_data = self.extractor.extract(receipt_text) + + return structured_data \ No newline at end of file diff --git a/spendoo/ocr/routes.py b/spendoo/ocr/routes.py index 931f6b6..9070e48 100644 --- a/spendoo/ocr/routes.py +++ b/spendoo/ocr/routes.py @@ -1,13 +1,19 @@ -from fastapi import APIRouter +from fastapi import APIRouter, UploadFile, HTTPException +import base64 +from spendoo.ocr.pipeline import ReceiptPipeline -router = APIRouter(prefix="/ocr", tags=["ocr"]) +router = APIRouter(prefix="/ocr", tags=["OCR"]) +pipeline = ReceiptPipeline() -@router.get("/") -def info(): - return {"module": "ocr", "status": "ok"} +@router.post("/scan") +async def scan_receipt(file: UploadFile): + if not file.content_type.startswith("image/"): + raise HTTPException(status_code=400, detail="Invalid file type. Please upload an image.") + + image_bytes = await file.read() -@router.get("/health") -def health(): - return {"status": "ok"} + result = pipeline.process_receipt(image_bytes) + + return result diff --git a/spendoo/ocr/service.py b/spendoo/ocr/service.py index a8879a9..72458b3 100644 --- a/spendoo/ocr/service.py +++ b/spendoo/ocr/service.py @@ -1,5 +1,26 @@ -# OCR service layer placeholder +import base64 +from mistralai import Mistral +from spendoo.core.config import settings +from groq import Groq +import base64 -def get_status(): - return {'module': 'ocr', 'status': 'ok'} +class OCRService: + def __init__(self): + self.client = Mistral(api_key=settings.MISTRAL_API_KEY) + + def extract_text(self, base64_image): + + response = self.client.ocr.process( + model="mistral-ocr-latest", + document={ + "type": "image_url", + "image_url": f"data:image/jpeg;base64,{base64_image}" + }, + # table_format="markdown" + ) + + return "\n\n".join( + f"### Page {i+1}\n{response.pages[i].markdown}" + for i in range(len(response.pages)) + ) \ No newline at end of file