Skip to content

Commit 5bd6b18

Browse files
committed
✨ (backend) Add document search view.
Signed-off-by: Fabre Florian <[email protected]>
1 parent f1984af commit 5bd6b18

File tree

9 files changed

+306
-32
lines changed

9 files changed

+306
-32
lines changed

env.d/development/common

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ LOGOUT_REDIRECT_URL=http://localhost:3000
4949
OIDC_REDIRECT_ALLOWED_HOSTS=["http://localhost:8083", "http://localhost:3000"]
5050
OIDC_AUTH_REQUEST_EXTRA_PARAMS={"acr_values": "eidas1"}
5151

52+
# Store OIDC tokens in the session
53+
OIDC_STORE_ACCESS_TOKEN = True # Store the access token in the session
54+
OIDC_STORE_REFRESH_TOKEN = True # Store the encrypted refresh token in the session
55+
OIDC_STORE_REFRESH_TOKEN_KEY = "uoJc422rSQjOXx6QIU5NOXSxeiycT47NrGQIBTWsjFU="
56+
5257
# AI
5358
AI_FEATURE_ENABLED=true
5459
AI_BASE_URL=https://openaiendpoint.com

src/backend/core/api/serializers.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -821,3 +821,17 @@ class MoveDocumentSerializer(serializers.Serializer):
821821
choices=enums.MoveNodePositionChoices.choices,
822822
default=enums.MoveNodePositionChoices.LAST_CHILD,
823823
)
824+
825+
826+
class FindDocumentSerializer(serializers.Serializer):
827+
"""Serializer for Find search requests"""
828+
829+
q = serializers.CharField(required=True)
830+
831+
def validate_q(self, value):
832+
"""Ensure the text field is not empty."""
833+
834+
if len(value.strip()) == 0:
835+
raise serializers.ValidationError("Text field cannot be empty.")
836+
837+
return value

src/backend/core/api/viewsets.py

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from django.db.models.functions import Left, Length
2121
from django.http import Http404, StreamingHttpResponse
2222
from django.urls import reverse
23+
from django.utils.decorators import method_decorator
2324
from django.utils.functional import cached_property
2425
from django.utils.text import capfirst, slugify
2526
from django.utils.translation import gettext_lazy as _
@@ -30,13 +31,15 @@
3031
from csp.constants import NONE
3132
from csp.decorators import csp_update
3233
from lasuite.malware_detection import malware_detection
34+
from lasuite.oidc_login.decorators import refresh_oidc_access_token
3335
from rest_framework import filters, status, viewsets
3436
from rest_framework import response as drf_response
3537
from rest_framework.permissions import AllowAny
3638

3739
from core import authentication, choices, enums, models
3840
from core.services.ai_services import AIService
3941
from core.services.collaboration_services import CollaborationService
42+
from core.services.search_indexers import FindDocumentIndexer
4043
from core.tasks.mail import send_ask_for_access_mail
4144
from core.utils import extract_attachments, filter_descendants
4245

@@ -357,6 +360,7 @@ class DocumentViewSet(
357360
list_serializer_class = serializers.ListDocumentSerializer
358361
trashbin_serializer_class = serializers.ListDocumentSerializer
359362
tree_serializer_class = serializers.ListDocumentSerializer
363+
search_serializer_class = serializers.ListDocumentSerializer
360364

361365
def get_queryset(self):
362366
"""Get queryset performing all annotation and filtering on the document tree structure."""
@@ -987,10 +991,37 @@ def duplicate(self, request, *args, **kwargs):
987991
{"id": str(duplicated_document.id)}, status=status.HTTP_201_CREATED
988992
)
989993

990-
# TODO
991-
# @drf.decorators.action(detail=False, methods=["get"])
992-
# def search(self, request, *args, **kwargs):
993-
# index.search()
994+
@drf.decorators.action(detail=False, methods=["get"], url_path="search")
995+
@method_decorator(refresh_oidc_access_token)
996+
def search(self, request, *args, **kwargs):
997+
"""
998+
Returns a DRF response containing the filtered, annotated and ordered document list.
999+
The filtering allows full text search through the opensearch indexation app "find".
1000+
"""
1001+
access_token = request.session.get("oidc_access_token")
1002+
1003+
serializer = serializers.FindDocumentSerializer(data=request.query_params)
1004+
serializer.is_valid(raise_exception=True)
1005+
1006+
indexer = FindDocumentIndexer()
1007+
try:
1008+
queryset = indexer.search(
1009+
text=serializer.validated_data.get("q", ""),
1010+
user=request.user,
1011+
token=access_token,
1012+
)
1013+
except RuntimeError as err:
1014+
return drf.response.Response(
1015+
{"detail": "The service is not configured properly."},
1016+
status=status.HTTP_401_UNAUTHORIZED,
1017+
)
1018+
1019+
return self.get_response_for_queryset(
1020+
queryset,
1021+
context={
1022+
"request": request,
1023+
},
1024+
)
9941025

9951026
@drf.decorators.action(detail=True, methods=["get"], url_path="versions")
9961027
def versions_list(self, request, *args, **kwargs):

src/backend/core/services/search_indexers.py

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,11 @@ def get_visited_document_ids_of(user):
5252
return []
5353

5454
# TODO : exclude links when user already have a specific access to the doc
55-
qs = models.LinkTrace.objects.filter(
56-
user=user
57-
).exclude(
55+
qs = models.LinkTrace.objects.filter(user=user).exclude(
5856
document__accesses__user=user,
5957
)
6058

61-
return list({
62-
str(id) for id in qs.values_list("document_id", flat=True)
63-
})
59+
return list({str(id) for id in qs.values_list("document_id", flat=True)})
6460

6561

6662
class BaseDocumentIndexer(ABC):
@@ -129,13 +125,14 @@ def search(self, text, user, token):
129125
"""
130126
visited_ids = get_visited_document_ids_of(user)
131127

132-
response = self.search_query(data={
133-
"q": text,
134-
"visited": visited_ids,
135-
"services": ["docs"],
136-
}, token=token)
137-
138-
print(response)
128+
response = self.search_query(
129+
data={
130+
"q": text,
131+
"visited": visited_ids,
132+
"services": ["docs"],
133+
},
134+
token=token,
135+
)
139136

140137
return self.format_response(response)
141138

@@ -207,7 +204,7 @@ def search_query(self, data, token) -> requests.Response:
207204

208205
if not url:
209206
raise RuntimeError(
210-
"SEARCH_INDEXER_QUERY_URL must be set in Django settings before indexing."
207+
"SEARCH_INDEXER_QUERY_URL must be set in Django settings before search."
211208
)
212209

213210
try:
@@ -228,9 +225,7 @@ def format_response(self, data: dict):
228225
"""
229226
Retrieve documents ids from Find app response and return a queryset.
230227
"""
231-
return models.Document.objects.filter(pk__in=[
232-
d['_id'] for d in data
233-
])
228+
return models.Document.objects.filter(pk__in=[d["_id"] for d in data])
234229

235230
def push(self, data):
236231
"""

src/backend/core/tasks/find.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@ def _aux():
8686

8787
logger.info(
8888
"Add task for document %s indexation in %.2f seconds",
89-
document.pk, countdown
89+
document.pk,
90+
countdown,
9091
)
9192

9293
# Each time this method is called during the countdown, we increment the

src/backend/core/tests/commands/test_index.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def test_index():
2121

2222
with transaction.atomic():
2323
doc = factories.DocumentFactory()
24-
empty_doc = factories.DocumentFactory(title=None, content='')
24+
empty_doc = factories.DocumentFactory(title=None, content="")
2525
no_title_doc = factories.DocumentFactory(title=None)
2626

2727
factories.UserDocumentAccessFactory(document=doc, user=user)
@@ -43,7 +43,10 @@ def sortkey(d):
4343
push_call_args = [call.args[0] for call in mock_push.call_args_list]
4444

4545
assert len(push_call_args) == 1 # called once but with a batch of docs
46-
assert sorted(push_call_args[0], key=sortkey) == sorted([
47-
indexer.serialize_document(doc, accesses),
48-
indexer.serialize_document(no_title_doc, accesses),
49-
], key=sortkey)
46+
assert sorted(push_call_args[0], key=sortkey) == sorted(
47+
[
48+
indexer.serialize_document(doc, accesses),
49+
indexer.serialize_document(no_title_doc, accesses),
50+
],
51+
key=sortkey,
52+
)
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
"""
2+
Tests for Documents API endpoint in impress's core app: list
3+
"""
4+
5+
import pytest
6+
import responses
7+
from faker import Faker
8+
from rest_framework.test import APIClient
9+
10+
from core import factories, models
11+
12+
fake = Faker()
13+
pytestmark = pytest.mark.django_db
14+
15+
16+
@pytest.mark.parametrize("role", models.LinkRoleChoices.values)
17+
@pytest.mark.parametrize("reach", models.LinkReachChoices.values)
18+
@responses.activate
19+
def test_api_documents_search_anonymous(reach, role, settings):
20+
"""
21+
Anonymous users should not be allowed to search documents whatever the
22+
link reach and link role
23+
"""
24+
factories.DocumentFactory(link_reach=reach, link_role=role)
25+
settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
26+
27+
factories.DocumentFactory(link_reach=reach, link_role=role)
28+
29+
# Find response
30+
responses.add(
31+
responses.POST,
32+
"http://find/api/v1.0/search",
33+
json=[],
34+
status=200,
35+
)
36+
37+
response = APIClient().get("/api/v1.0/documents/search/", data={"q": "alpha"})
38+
39+
assert response.status_code == 200
40+
assert response.json() == {
41+
"count": 0,
42+
"next": None,
43+
"previous": None,
44+
"results": [],
45+
}
46+
47+
48+
def test_api_documents_search_endpoint_is_none(settings):
49+
"""Missing SEARCH_INDEXER_QUERY_URL should throw an error"""
50+
settings.SEARCH_INDEXER_QUERY_URL = None
51+
52+
user = factories.UserFactory()
53+
54+
client = APIClient()
55+
client.force_login(user)
56+
57+
response = APIClient().get("/api/v1.0/documents/search/", data={"q": "alpha"})
58+
59+
assert response.status_code == 401
60+
assert response.json() == {"detail": "The service is not configured properly."}
61+
62+
63+
@responses.activate
64+
def test_api_documents_search_invalid_params(settings):
65+
"""Validate the format of documents as returned by the search view."""
66+
settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
67+
68+
user = factories.UserFactory()
69+
70+
client = APIClient()
71+
client.force_login(user)
72+
73+
response = APIClient().get("/api/v1.0/documents/search/")
74+
75+
assert response.status_code == 400
76+
assert response.json() == {"q": ["This field is required."]}
77+
78+
79+
@responses.activate
80+
def test_api_documents_search_format(settings):
81+
"""Validate the format of documents as returned by the search view."""
82+
settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
83+
84+
user = factories.UserFactory()
85+
86+
client = APIClient()
87+
client.force_login(user)
88+
89+
user_a, user_b, user_c = factories.UserFactory.create_batch(3)
90+
document = factories.DocumentFactory(
91+
title="alpha",
92+
users=(user_a, user_c),
93+
link_traces=(user, user_b),
94+
)
95+
access = factories.UserDocumentAccessFactory(document=document, user=user)
96+
97+
# Find response
98+
responses.add(
99+
responses.POST,
100+
"http://find/api/v1.0/search",
101+
json=[
102+
{"_id": str(document.pk)},
103+
],
104+
status=200,
105+
)
106+
response = client.get("/api/v1.0/documents/search/", data={"q": "alpha"})
107+
108+
assert response.status_code == 200
109+
content = response.json()
110+
results = content.pop("results")
111+
assert content == {
112+
"count": 1,
113+
"next": None,
114+
"previous": None,
115+
}
116+
assert len(results) == 1
117+
assert results[0] == {
118+
"id": str(document.id),
119+
"abilities": document.get_abilities(user),
120+
"ancestors_link_reach": None,
121+
"ancestors_link_role": None,
122+
"computed_link_reach": document.computed_link_reach,
123+
"computed_link_role": document.computed_link_role,
124+
"created_at": document.created_at.isoformat().replace("+00:00", "Z"),
125+
"creator": str(document.creator.id),
126+
"depth": 1,
127+
"excerpt": document.excerpt,
128+
"link_reach": document.link_reach,
129+
"link_role": document.link_role,
130+
"nb_accesses_ancestors": 3,
131+
"nb_accesses_direct": 3,
132+
"numchild": 0,
133+
"path": document.path,
134+
"title": document.title,
135+
"updated_at": document.updated_at.isoformat().replace("+00:00", "Z"),
136+
"user_role": access.role,
137+
}

0 commit comments

Comments
 (0)