11import logging
2- from uuid import UUID , uuid4
3- from typing import List
42from pathlib import Path
3+ from uuid import UUID , uuid4
54
6- from fastapi import APIRouter , File , UploadFile , Query , HTTPException
5+ from fastapi import (
6+ APIRouter ,
7+ BackgroundTasks ,
8+ File ,
9+ Form ,
10+ HTTPException ,
11+ Query ,
12+ UploadFile ,
13+ )
714from fastapi import Path as FastPath
815
9- from app .crud import DocumentCrud , CollectionCrud
10- from app .models import Document , DocumentPublic , Message
11- from app .utils import APIResponse , load_description , get_openai_client
12- from app .api .deps import CurrentUser , SessionDep , CurrentUserOrgProject
16+ from app .api .deps import CurrentUserOrgProject , SessionDep
1317from app .core .cloud import get_cloud_storage
18+ from app .core .doctransform import service as transformation_service
19+ from app .core .doctransform .registry import (
20+ get_available_transformers ,
21+ get_file_format ,
22+ is_transformation_supported ,
23+ resolve_transformer ,
24+ )
25+ from app .crud import CollectionCrud , DocumentCrud
1426from app .crud .rag import OpenAIAssistantCrud
27+ from app .models import (
28+ Document ,
29+ DocumentPublic ,
30+ DocumentUploadResponse ,
31+ Message ,
32+ TransformationJobInfo ,
33+ )
34+ from app .utils import APIResponse , get_openai_client , load_description
35+
1536
1637logger = logging .getLogger (__name__ )
1738router = APIRouter (prefix = "/documents" , tags = ["documents" ])
2041@router .get (
2142 "/list" ,
2243 description = load_description ("documents/list.md" ),
23- response_model = APIResponse [List [DocumentPublic ]],
44+ response_model = APIResponse [list [DocumentPublic ]],
2445)
2546def list_docs (
2647 session : SessionDep ,
@@ -36,13 +57,53 @@ def list_docs(
3657@router .post (
3758 "/upload" ,
3859 description = load_description ("documents/upload.md" ),
39- response_model = APIResponse [DocumentPublic ],
60+ response_model = APIResponse [DocumentUploadResponse ],
4061)
41- def upload_doc (
62+ async def upload_doc (
4263 session : SessionDep ,
4364 current_user : CurrentUserOrgProject ,
65+ background_tasks : BackgroundTasks ,
4466 src : UploadFile = File (...),
67+ target_format : str
68+ | None = Form (
69+ None ,
70+ description = "Desired output format for the uploaded document (e.g., pdf, docx, txt). " ,
71+ ),
72+ transformer : str
73+ | None = Form (
74+ None , description = "Name of the transformer to apply when converting. "
75+ ),
4576):
77+ # Determine source file format
78+ try :
79+ source_format = get_file_format (src .filename )
80+ except ValueError as e :
81+ raise HTTPException (status_code = 400 , detail = str (e ))
82+
83+ # validate if transformation is possible or not
84+ if target_format :
85+ if not is_transformation_supported (source_format , target_format ):
86+ raise HTTPException (
87+ status_code = 400 ,
88+ detail = f"Transformation from { source_format } to { target_format } is not supported" ,
89+ )
90+
91+ # Resolve the transformer to use
92+ if not transformer :
93+ transformer = "default"
94+ try :
95+ actual_transformer = resolve_transformer (
96+ source_format , target_format , transformer
97+ )
98+ except ValueError as e :
99+ available_transformers = get_available_transformers (
100+ source_format , target_format
101+ )
102+ raise HTTPException (
103+ status_code = 400 ,
104+ detail = f"{ str (e )} . Available transformers: { list (available_transformers .keys ())} " ,
105+ )
106+
46107 storage = get_cloud_storage (session = session , project_id = current_user .project_id )
47108 document_id = uuid4 ()
48109
@@ -54,8 +115,38 @@ def upload_doc(
54115 fname = src .filename ,
55116 object_store_url = str (object_store_url ),
56117 )
57- data = crud .update (document )
58- return APIResponse .success_response (data )
118+ source_document = crud .update (document )
119+
120+ job_info : TransformationJobInfo | None = None
121+ if target_format and actual_transformer :
122+ job_id = transformation_service .start_job (
123+ db = session ,
124+ current_user = current_user ,
125+ source_document_id = source_document .id ,
126+ transformer_name = actual_transformer ,
127+ target_format = target_format ,
128+ background_tasks = background_tasks ,
129+ )
130+ job_info = TransformationJobInfo (
131+ message = f"Document accepted for transformation from { source_format } to { target_format } ." ,
132+ job_id = str (job_id ),
133+ source_format = source_format ,
134+ target_format = target_format ,
135+ transformer = actual_transformer ,
136+ status_check_url = f"/documents/transformations/{ job_id } " ,
137+ )
138+
139+ document_schema = DocumentPublic .model_validate (
140+ source_document , from_attributes = True
141+ )
142+ document_schema .signed_url = storage .get_signed_url (
143+ source_document .object_store_url
144+ )
145+ response = DocumentUploadResponse (
146+ ** document_schema .model_dump (), transformation_job = job_info
147+ )
148+
149+ return APIResponse .success_response (response )
59150
60151
61152@router .delete (
0 commit comments