Skip to content

Commit f24f4c5

Browse files
authored
Refactor Document Module and Cloud Storage to use project-based organization and add signed URL support (#346)
* Add project id in model and use project id for crud * add support for signed url * Add storage path in project table * fix collection endpoint * Fix api routes test cases for document module * fix crud unit test for document module * fix unit test for collections * use project id instead of project model in document store utils * Fix migration * precommit * introduce get_cloud_storage method * fix testcase * fix message * implement abstract method in CloudStorage class * use POSIX separators and Reject absolute file_path
1 parent 5579c37 commit f24f4c5

27 files changed

+439
-193
lines changed
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
"""add storage_path to project and project_id to document table
2+
3+
Revision ID: 40307ab77e9f
4+
Revises: 8725df286943
5+
Create Date: 2025-08-28 10:54:30.712627
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
11+
12+
# revision identifiers, used by Alembic.
13+
revision = "40307ab77e9f"
14+
down_revision = "8725df286943"
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
# ### commands auto generated by Alembic - please adjust! ###
21+
22+
op.add_column("project", sa.Column("storage_path", sa.Uuid(), nullable=True))
23+
24+
conn = op.get_bind()
25+
conn.execute(sa.text("UPDATE project SET storage_path = gen_random_uuid()"))
26+
27+
op.alter_column("project", "storage_path", nullable=False)
28+
op.create_unique_constraint("uq_project_storage_path", "project", ["storage_path"])
29+
30+
op.add_column("document", sa.Column("project_id", sa.Integer(), nullable=True))
31+
op.add_column("document", sa.Column("is_deleted", sa.Boolean(), nullable=True))
32+
33+
conn.execute(
34+
sa.text(
35+
"""
36+
UPDATE document
37+
SET is_deleted = CASE
38+
WHEN deleted_at IS NULL THEN false
39+
ELSE true
40+
END
41+
"""
42+
)
43+
)
44+
conn.execute(
45+
sa.text(
46+
"""
47+
UPDATE document
48+
SET project_id = (
49+
SELECT project_id FROM apikey
50+
WHERE apikey.user_id = document.owner_id
51+
LIMIT 1
52+
)
53+
"""
54+
)
55+
)
56+
57+
op.alter_column("document", "is_deleted", nullable=False)
58+
op.alter_column("document", "project_id", nullable=False)
59+
60+
op.drop_constraint("document_owner_id_fkey", "document", type_="foreignkey")
61+
op.create_foreign_key(
62+
None, "document", "project", ["project_id"], ["id"], ondelete="CASCADE"
63+
)
64+
op.drop_column("document", "owner_id")
65+
66+
# ### end Alembic commands ###
67+
68+
69+
def downgrade():
70+
# ### commands auto generated by Alembic - please adjust! ###
71+
op.drop_constraint("uq_project_storage_path", "project", type_="unique")
72+
op.drop_column("project", "storage_path")
73+
74+
op.add_column(
75+
"document",
76+
sa.Column("owner_id", sa.Integer(), autoincrement=False, nullable=True),
77+
)
78+
79+
conn = op.get_bind()
80+
# Backfill owner_id from project_id using api_key mapping
81+
conn.execute(
82+
sa.text(
83+
"""
84+
UPDATE document d
85+
SET owner_id = (
86+
SELECT user_id
87+
FROM apikey a
88+
WHERE a.project_id = d.project_id
89+
LIMIT 1
90+
)
91+
"""
92+
)
93+
)
94+
95+
op.alter_column("document", "owner_id", nullable=False)
96+
97+
op.drop_constraint("document_project_id_fkey", "document", type_="foreignkey")
98+
op.create_foreign_key(
99+
"document_owner_id_fkey",
100+
"document",
101+
"user",
102+
["owner_id"],
103+
["id"],
104+
ondelete="CASCADE",
105+
)
106+
op.drop_column("document", "is_deleted")
107+
op.drop_column("document", "project_id")
108+
# ### end Alembic commands ###

backend/app/api/routes/collections.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from sqlalchemy.exc import SQLAlchemyError
1616

1717
from app.api.deps import CurrentUser, SessionDep, CurrentUserOrgProject
18-
from app.core.cloud import AmazonCloudStorage
18+
from app.core.cloud import get_cloud_storage
1919
from app.api.routes.responses import handle_openai_error
2020
from app.core.util import now, post_callback
2121
from app.crud import (
@@ -24,7 +24,7 @@
2424
DocumentCollectionCrud,
2525
)
2626
from app.crud.rag import OpenAIVectorStoreCrud, OpenAIAssistantCrud
27-
from app.models import Collection, Document
27+
from app.models import Collection, Document, DocumentPublic
2828
from app.models.collection import CollectionStatus
2929
from app.utils import APIResponse, load_description, get_openai_client
3030

@@ -225,8 +225,8 @@ def do_create_collection(
225225
else WebHookCallback(request.callback_url, payload)
226226
)
227227

228-
storage = AmazonCloudStorage(current_user)
229-
document_crud = DocumentCrud(session, current_user.id)
228+
storage = get_cloud_storage(session=session, project_id=current_user.project_id)
229+
document_crud = DocumentCrud(session, current_user.project_id)
230230
assistant_crud = OpenAIAssistantCrud(client)
231231
vector_store_crud = OpenAIVectorStoreCrud(client)
232232
collection_crud = CollectionCrud(session, current_user.id)
@@ -423,7 +423,7 @@ def list_collections(
423423
@router.post(
424424
"/docs/{collection_id}",
425425
description=load_description("collections/docs.md"),
426-
response_model=APIResponse[List[Document]],
426+
response_model=APIResponse[List[DocumentPublic]],
427427
)
428428
def collection_documents(
429429
session: SessionDep,

backend/app/api/routes/documents.py

Lines changed: 37 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
from typing import List
44
from pathlib import Path
55

6-
from fastapi import APIRouter, File, UploadFile, Query
6+
from fastapi import APIRouter, File, UploadFile, Query, HTTPException
77
from fastapi import Path as FastPath
88

99
from app.crud import DocumentCrud, CollectionCrud
10-
from app.models import Document
10+
from app.models import Document, DocumentPublic, Message
1111
from app.utils import APIResponse, load_description, get_openai_client
1212
from app.api.deps import CurrentUser, SessionDep, CurrentUserOrgProject
13-
from app.core.cloud import AmazonCloudStorage
13+
from app.core.cloud import get_cloud_storage
1414
from app.crud.rag import OpenAIAssistantCrud
1515

1616
logger = logging.getLogger(__name__)
@@ -20,35 +20,35 @@
2020
@router.get(
2121
"/list",
2222
description=load_description("documents/list.md"),
23-
response_model=APIResponse[List[Document]],
23+
response_model=APIResponse[List[DocumentPublic]],
2424
)
2525
def list_docs(
2626
session: SessionDep,
27-
current_user: CurrentUser,
27+
current_user: CurrentUserOrgProject,
2828
skip: int = Query(0, ge=0),
2929
limit: int = Query(100, gt=0, le=100),
3030
):
31-
crud = DocumentCrud(session, current_user.id)
31+
crud = DocumentCrud(session, current_user.project_id)
3232
data = crud.read_many(skip, limit)
3333
return APIResponse.success_response(data)
3434

3535

3636
@router.post(
3737
"/upload",
3838
description=load_description("documents/upload.md"),
39-
response_model=APIResponse[Document],
39+
response_model=APIResponse[DocumentPublic],
4040
)
4141
def upload_doc(
4242
session: SessionDep,
43-
current_user: CurrentUser,
43+
current_user: CurrentUserOrgProject,
4444
src: UploadFile = File(...),
4545
):
46-
storage = AmazonCloudStorage(current_user)
46+
storage = get_cloud_storage(session=session, project_id=current_user.project_id)
4747
document_id = uuid4()
4848

4949
object_store_url = storage.put(src, Path(str(document_id)))
5050

51-
crud = DocumentCrud(session, current_user.id)
51+
crud = DocumentCrud(session, current_user.project_id)
5252
document = Document(
5353
id=document_id,
5454
fname=src.filename,
@@ -58,10 +58,10 @@ def upload_doc(
5858
return APIResponse.success_response(data)
5959

6060

61-
@router.get(
61+
@router.delete(
6262
"/remove/{doc_id}",
6363
description=load_description("documents/delete.md"),
64-
response_model=APIResponse[Document],
64+
response_model=APIResponse[Message],
6565
)
6666
def remove_doc(
6767
session: SessionDep,
@@ -73,18 +73,21 @@ def remove_doc(
7373
)
7474

7575
a_crud = OpenAIAssistantCrud(client)
76-
d_crud = DocumentCrud(session, current_user.id)
76+
d_crud = DocumentCrud(session, current_user.project_id)
7777
c_crud = CollectionCrud(session, current_user.id)
7878

7979
document = d_crud.delete(doc_id)
8080
data = c_crud.delete(document, a_crud)
81-
return APIResponse.success_response(data)
81+
82+
return APIResponse.success_response(
83+
Message(message="Document Deleted Successfully")
84+
)
8285

8386

8487
@router.delete(
8588
"/remove/{doc_id}/permanent",
8689
description=load_description("documents/permanent_delete.md"),
87-
response_model=APIResponse[Document],
90+
response_model=APIResponse[Message],
8891
)
8992
def permanent_delete_doc(
9093
session: SessionDep,
@@ -94,11 +97,10 @@ def permanent_delete_doc(
9497
client = get_openai_client(
9598
session, current_user.organization_id, current_user.project_id
9699
)
97-
98100
a_crud = OpenAIAssistantCrud(client)
99-
d_crud = DocumentCrud(session, current_user.id)
101+
d_crud = DocumentCrud(session, current_user.project_id)
100102
c_crud = CollectionCrud(session, current_user.id)
101-
storage = AmazonCloudStorage(current_user)
103+
storage = get_cloud_storage(session=session, project_id=current_user.project_id)
102104

103105
document = d_crud.read_one(doc_id)
104106

@@ -107,19 +109,30 @@ def permanent_delete_doc(
107109
storage.delete(document.object_store_url)
108110
d_crud.delete(doc_id)
109111

110-
return APIResponse.success_response(document)
112+
return APIResponse.success_response(
113+
Message(message="Document permanently deleted successfully")
114+
)
111115

112116

113117
@router.get(
114118
"/info/{doc_id}",
115119
description=load_description("documents/info.md"),
116-
response_model=APIResponse[Document],
120+
response_model=APIResponse[DocumentPublic],
117121
)
118122
def doc_info(
119123
session: SessionDep,
120-
current_user: CurrentUser,
124+
current_user: CurrentUserOrgProject,
121125
doc_id: UUID = FastPath(description="Document to retrieve"),
126+
include_url: bool = Query(
127+
False, description="Include a signed URL to access the document"
128+
),
122129
):
123-
crud = DocumentCrud(session, current_user.id)
124-
data = crud.read_one(doc_id)
125-
return APIResponse.success_response(data)
130+
crud = DocumentCrud(session, current_user.project_id)
131+
document = crud.read_one(doc_id)
132+
133+
doc_schema = DocumentPublic.model_validate(document, from_attributes=True)
134+
if include_url:
135+
storage = get_cloud_storage(session=session, project_id=current_user.project_id)
136+
doc_schema.signed_url = storage.get_signed_url(document.object_store_url)
137+
138+
return APIResponse.success_response(doc_schema)

backend/app/core/cloud/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@
33
AmazonCloudStorageClient,
44
CloudStorage,
55
CloudStorageError,
6+
get_cloud_storage,
67
)

0 commit comments

Comments
 (0)