From 55bc87712ca7e133bbb9c4285bb60cbf16d5a0f6 Mon Sep 17 00:00:00 2001 From: Sean McGrath Date: Mon, 9 Feb 2026 15:41:42 +1300 Subject: [PATCH] fix: sanitize uploaded filenames before storage to match LibreChat LibreChat translates spaces and special characters in filenames to underscores, but KCR was storing the original unsanitized name. This caused the model to fail on first file access since the name on disk didn't match what LibreChat reported. --- src/api/files.py | 11 ++++++----- tests/unit/test_api_files.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/src/api/files.py b/src/api/files.py index 4d20e00..6a01e4e 100644 --- a/src/api/files.py +++ b/src/api/files.py @@ -111,17 +111,18 @@ async def upload_file( # Read file content content = await file.read() - # Store file directly + # Sanitize filename before storage so the name on disk in the + # execution pod matches what LibreChat reports to the model. + sanitized_name = OutputProcessor.sanitize_filename(file.filename) + + # Store file with the sanitized name file_id = await file_service.store_uploaded_file( session_id=session_id, - filename=file.filename, + filename=sanitized_name, content=content, content_type=file.content_type, ) - # Sanitize filename to match what will be used in container - sanitized_name = OutputProcessor.sanitize_filename(file.filename) - uploaded_files.append( { "id": file_id, diff --git a/tests/unit/test_api_files.py b/tests/unit/test_api_files.py index a3f8394..1fbc96e 100644 --- a/tests/unit/test_api_files.py +++ b/tests/unit/test_api_files.py @@ -132,6 +132,42 @@ async def test_upload_single_file(self, mock_file_service, mock_session_service, mock_file_service.store_uploaded_file.assert_called_once() mock_session_service.create_session.assert_called_once() + @pytest.mark.asyncio + @pytest.mark.parametrize( + "original, expected", + [ + ("my file.csv", "my_file.csv"), + ("report (final).xlsx", "report__final_.xlsx"), + ("data&summary#2.txt", "data_summary_2.txt"), + ("résumé (v2).pdf", "r_sum___v2_.pdf"), + ("hello world!@#$.csv", "hello_world____.csv"), + ], + ) + async def test_upload_sanitizes_filename_before_storage( + self, mock_file_service, mock_session_service, original, expected + ): + """Test that filenames with special characters are sanitized before storing.""" + file = MagicMock(spec=UploadFile) + file.filename = original + file.content_type = "text/csv" + file.size = 100 + file.read = AsyncMock(return_value=b"a,b,c") + + result = await upload_file( + file=file, + files=None, + entity_id=None, + file_service=mock_file_service, + session_service=mock_session_service, + ) + + # The stored filename should be sanitized + call_kwargs = mock_file_service.store_uploaded_file.call_args + assert call_kwargs.kwargs["filename"] == expected + + # The response should also use the sanitized name + assert result["files"][0]["filename"] == expected + @pytest.mark.asyncio async def test_upload_multiple_files(self, mock_file_service, mock_session_service, mock_upload_file): """Test uploading multiple files."""