Skip to content

Commit 8389041

Browse files
committed
fix: optimize paragraph import by reducing N+1 query issue for document max positions
1 parent 8516bed commit 8389041

File tree

1 file changed

+12
-5
lines changed

1 file changed

+12
-5
lines changed

importer/knowledge_import.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -157,11 +157,18 @@ def paragraph_import(file_list, source_name, current_page):
157157
document_ids = list(document_paragraphs.keys())
158158
existing_positions = {}
159159

160-
for doc_id in document_ids:
161-
max_position = QuerySet(Paragraph).filter(document=doc_id).aggregate(
162-
max_pos=models.Max('position')
163-
)['max_pos']
164-
existing_positions[doc_id] = max_position or 0
160+
# 优化:使用单次查询获取所有文档的最大position,避免N+1查询问题
161+
if document_ids:
162+
max_positions = (QuerySet(Paragraph)
163+
.filter(document__in=document_ids)
164+
.values('document')
165+
.annotate(max_pos=models.Max('position'))
166+
.values_list('document', 'max_pos'))
167+
168+
# 构建字典,设置默认值为0
169+
existing_positions = {doc_id: 0 for doc_id in document_ids}
170+
for doc_id, max_pos in max_positions:
171+
existing_positions[doc_id] = max_pos or 0
165172

166173
paragraph_model_list = []
167174
for document_id, paragraphs in document_paragraphs.items():

0 commit comments

Comments
 (0)