feat(05-09): PATCH /documents/{id} endpoint + cloud-aware Celery re-analyze

- Add DocumentPatch Pydantic model with filename and folder_id optional fields
- Add PATCH /api/documents/{doc_id} endpoint: ownership guard, model_fields_set
  to distinguish absent vs null folder_id, returns updated metadata dict
- Update _run() in document_tasks.py to use get_storage_backend_for_document
  for non-MinIO backends instead of hardcoded MinIO path
- CloudConnectionError caught in cloud path: returns extract_failed status
- Update test to use pure unit mocks (no PostgreSQL) for _run() cloud routing
- All 3 plan tests pass; 23 test_cloud.py tests pass
This commit is contained in:
curo1305
2026-05-30 11:16:01 +02:00
parent 9bc056100c
commit 6d094d17f0
3 changed files with 143 additions and 29 deletions
+50 -24
View File
@@ -638,50 +638,76 @@ async def test_patch_document_wrong_owner(async_client, db_session):
assert resp.status_code == 404
async def test_reanalyze_cloud_document_routes_to_cloud_backend(db_session):
async def test_reanalyze_cloud_document_routes_to_cloud_backend():
"""Re-analyze task calls get_storage_backend_for_document for cloud documents.
Verifies that doc.storage_backend != 'minio' causes _run() to use the cloud
backend path instead of the MinIO path (Plan 09, requirement CLOUD-07).
Pure unit test — mocks AsyncSessionLocal so no PostgreSQL connection is needed.
"""
from db.models import Document
from tasks.document_tasks import _run
from unittest.mock import AsyncMock, patch, MagicMock
auth = await _create_user_and_token(db_session, role="user")
# Create a nextcloud document
doc_id = _uuid.uuid4()
doc = Document(
id=doc_id,
user_id=auth["user"].id,
filename="cloud.pdf",
content_type="application/pdf",
size_bytes=2048,
storage_backend="nextcloud",
status="uploaded",
object_key="nc_file_id_xyz",
)
db_session.add(doc)
await db_session.commit()
user_id = _uuid.uuid4()
# Mock cloud backend: returns file bytes, enabling extraction to proceed
# Build a minimal mock Document and User (no DB)
mock_doc = MagicMock()
mock_doc.id = doc_id
mock_doc.user_id = user_id
mock_doc.storage_backend = "nextcloud"
mock_doc.object_key = "nc_file_id_xyz"
mock_doc.content_type = "application/pdf"
mock_doc.filename = "cloud.pdf"
mock_doc.status = "uploaded"
mock_user = MagicMock()
mock_user.id = user_id
mock_user.ai_provider = None
mock_user.ai_model = None
# Mock cloud backend: returns fake bytes so extraction can proceed
mock_cloud_backend = AsyncMock()
mock_cloud_backend.get_object = AsyncMock(return_value=b"%PDF-1.4 fake content")
mock_cloud_backend.get_object = AsyncMock(return_value=b"%PDF-1.4 fake")
# Mock MinIO backend to verify it is NOT called
mock_minio_backend = AsyncMock()
mock_minio_backend.get_object = AsyncMock(return_value=b"should not be called")
with patch("tasks.document_tasks.get_storage_backend_for_document", return_value=mock_cloud_backend) as mock_gsb_doc, \
patch("tasks.document_tasks.get_storage_backend", return_value=mock_minio_backend) as mock_gsb:
# Mock the DB session returned by AsyncSessionLocal
mock_session = AsyncMock()
async def _fake_get(model, pk):
if model.__name__ == "Document":
return mock_doc
if model.__name__ == "User":
return mock_user
return None
mock_session.get = _fake_get
# AsyncSessionLocal is an async context manager; mock it
class _FakeSessionCM:
async def __aenter__(self):
return mock_session
async def __aexit__(self, *args):
pass
# Patch at the storage module level (source of the functions used via deferred import)
with patch("db.session.AsyncSessionLocal", return_value=_FakeSessionCM()), \
patch("storage.get_storage_backend_for_document", return_value=mock_cloud_backend), \
patch("storage.get_storage_backend", return_value=mock_minio_backend), \
patch("services.extractor.extract_text_from_bytes", return_value="extracted text"), \
patch("services.classifier.classify_document", return_value=["doc"]):
result = await _run(str(doc_id))
# Cloud backend's get_object must have been called
# Cloud backend's get_object must have been called with the document's object_key
mock_cloud_backend.get_object.assert_called_once_with("nc_file_id_xyz")
# MinIO backend's get_object must NOT have been called
mock_minio_backend.get_object.assert_not_called()
# Result must not be an error from MinIO path
assert result.get("status") != "extract_failed" or "MinIO" not in result.get("error", "")
# Result must reflect successful classification, not a MinIO error
assert result.get("status") in ("classified", "classification_failed"), \
f"Expected classified/classification_failed, got: {result}"