feat(05-09): PATCH /documents/{id} endpoint + cloud-aware Celery re-analyze
- Add DocumentPatch Pydantic model with filename and folder_id optional fields
- Add PATCH /api/documents/{doc_id} endpoint: ownership guard, model_fields_set
to distinguish absent vs null folder_id, returns updated metadata dict
- Update _run() in document_tasks.py to use get_storage_backend_for_document
for non-MinIO backends instead of hardcoded MinIO path
- CloudConnectionError caught in cloud path: returns extract_failed status
- Update test to use pure unit mocks (no PostgreSQL) for _run() cloud routing
- All 3 plan tests pass; 23 test_cloud.py tests pass
This commit is contained in:
+50
-24
@@ -638,50 +638,76 @@ async def test_patch_document_wrong_owner(async_client, db_session):
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
async def test_reanalyze_cloud_document_routes_to_cloud_backend(db_session):
|
||||
async def test_reanalyze_cloud_document_routes_to_cloud_backend():
|
||||
"""Re-analyze task calls get_storage_backend_for_document for cloud documents.
|
||||
|
||||
Verifies that doc.storage_backend != 'minio' causes _run() to use the cloud
|
||||
backend path instead of the MinIO path (Plan 09, requirement CLOUD-07).
|
||||
|
||||
Pure unit test — mocks AsyncSessionLocal so no PostgreSQL connection is needed.
|
||||
"""
|
||||
from db.models import Document
|
||||
from tasks.document_tasks import _run
|
||||
from unittest.mock import AsyncMock, patch, MagicMock
|
||||
|
||||
auth = await _create_user_and_token(db_session, role="user")
|
||||
|
||||
# Create a nextcloud document
|
||||
doc_id = _uuid.uuid4()
|
||||
doc = Document(
|
||||
id=doc_id,
|
||||
user_id=auth["user"].id,
|
||||
filename="cloud.pdf",
|
||||
content_type="application/pdf",
|
||||
size_bytes=2048,
|
||||
storage_backend="nextcloud",
|
||||
status="uploaded",
|
||||
object_key="nc_file_id_xyz",
|
||||
)
|
||||
db_session.add(doc)
|
||||
await db_session.commit()
|
||||
user_id = _uuid.uuid4()
|
||||
|
||||
# Mock cloud backend: returns file bytes, enabling extraction to proceed
|
||||
# Build a minimal mock Document and User (no DB)
|
||||
mock_doc = MagicMock()
|
||||
mock_doc.id = doc_id
|
||||
mock_doc.user_id = user_id
|
||||
mock_doc.storage_backend = "nextcloud"
|
||||
mock_doc.object_key = "nc_file_id_xyz"
|
||||
mock_doc.content_type = "application/pdf"
|
||||
mock_doc.filename = "cloud.pdf"
|
||||
mock_doc.status = "uploaded"
|
||||
|
||||
mock_user = MagicMock()
|
||||
mock_user.id = user_id
|
||||
mock_user.ai_provider = None
|
||||
mock_user.ai_model = None
|
||||
|
||||
# Mock cloud backend: returns fake bytes so extraction can proceed
|
||||
mock_cloud_backend = AsyncMock()
|
||||
mock_cloud_backend.get_object = AsyncMock(return_value=b"%PDF-1.4 fake content")
|
||||
mock_cloud_backend.get_object = AsyncMock(return_value=b"%PDF-1.4 fake")
|
||||
|
||||
# Mock MinIO backend to verify it is NOT called
|
||||
mock_minio_backend = AsyncMock()
|
||||
mock_minio_backend.get_object = AsyncMock(return_value=b"should not be called")
|
||||
|
||||
with patch("tasks.document_tasks.get_storage_backend_for_document", return_value=mock_cloud_backend) as mock_gsb_doc, \
|
||||
patch("tasks.document_tasks.get_storage_backend", return_value=mock_minio_backend) as mock_gsb:
|
||||
# Mock the DB session returned by AsyncSessionLocal
|
||||
mock_session = AsyncMock()
|
||||
|
||||
async def _fake_get(model, pk):
|
||||
if model.__name__ == "Document":
|
||||
return mock_doc
|
||||
if model.__name__ == "User":
|
||||
return mock_user
|
||||
return None
|
||||
|
||||
mock_session.get = _fake_get
|
||||
|
||||
# AsyncSessionLocal is an async context manager; mock it
|
||||
class _FakeSessionCM:
|
||||
async def __aenter__(self):
|
||||
return mock_session
|
||||
async def __aexit__(self, *args):
|
||||
pass
|
||||
|
||||
# Patch at the storage module level (source of the functions used via deferred import)
|
||||
with patch("db.session.AsyncSessionLocal", return_value=_FakeSessionCM()), \
|
||||
patch("storage.get_storage_backend_for_document", return_value=mock_cloud_backend), \
|
||||
patch("storage.get_storage_backend", return_value=mock_minio_backend), \
|
||||
patch("services.extractor.extract_text_from_bytes", return_value="extracted text"), \
|
||||
patch("services.classifier.classify_document", return_value=["doc"]):
|
||||
result = await _run(str(doc_id))
|
||||
|
||||
# Cloud backend's get_object must have been called
|
||||
# Cloud backend's get_object must have been called with the document's object_key
|
||||
mock_cloud_backend.get_object.assert_called_once_with("nc_file_id_xyz")
|
||||
|
||||
# MinIO backend's get_object must NOT have been called
|
||||
mock_minio_backend.get_object.assert_not_called()
|
||||
|
||||
# Result must not be an error from MinIO path
|
||||
assert result.get("status") != "extract_failed" or "MinIO" not in result.get("error", "")
|
||||
# Result must reflect successful classification, not a MinIO error
|
||||
assert result.get("status") in ("classified", "classification_failed"), \
|
||||
f"Expected classified/classification_failed, got: {result}"
|
||||
|
||||
Reference in New Issue
Block a user