""" Tests for the /documents endpoints. Synthetic (minimal_pdf / invoice_pdf) tests run always. Live tests that use real PDFs from tests/pdfs/ are skipped when that directory is empty — drop any PDF there to activate them. """ import io import json from pathlib import Path from unittest.mock import patch import pytest pytestmark = pytest.mark.asyncio # ── Helpers ──────────────────────────────────────────────────────────────────── def _pdf_upload(filename: str, data: bytes): return {"file": (filename, io.BytesIO(data), "application/pdf")} # ── List / empty state ───────────────────────────────────────────────────────── async def test_list_documents_empty(client): r = await client.get("/documents") assert r.status_code == 200 assert r.json() == [] # ── Upload ───────────────────────────────────────────────────────────────────── async def test_upload_returns_202(client, minimal_pdf): with patch("app.routers.documents.process_document"): r = await client.post("/documents/upload", files=_pdf_upload("test.pdf", minimal_pdf)) assert r.status_code == 202 data = r.json() assert data["filename"] == "test.pdf" assert data["status"] == "pending" assert "id" in data async def test_upload_non_pdf_rejected(client): r = await client.post( "/documents/upload", files={"file": ("note.txt", io.BytesIO(b"hello"), "text/plain")}, ) assert r.status_code == 415 async def test_upload_file_too_large(client): big = b"%PDF-1.4\n" + b"x" * (21 * 1024 * 1024) with patch("app.routers.documents.process_document"): r = await client.post("/documents/upload", files=_pdf_upload("big.pdf", big)) assert r.status_code == 413 # ── Get / status ─────────────────────────────────────────────────────────────── async def test_get_document(client, minimal_pdf): with patch("app.routers.documents.process_document"): up = await client.post("/documents/upload", files=_pdf_upload("get.pdf", minimal_pdf)) doc_id = up.json()["id"] r = await client.get(f"/documents/{doc_id}") assert r.status_code == 200 assert r.json()["id"] == doc_id async def test_get_document_not_found(client): r = await client.get("/documents/nonexistent-id") assert r.status_code == 404 async def test_get_document_status(client, minimal_pdf): with patch("app.routers.documents.process_document"): up = await client.post("/documents/upload", files=_pdf_upload("status.pdf", minimal_pdf)) doc_id = up.json()["id"] r = await client.get(f"/documents/{doc_id}/status") assert r.status_code == 200 assert r.json()["status"] == "pending" async def test_other_user_cannot_see_document(client, other_client, minimal_pdf): with patch("app.routers.documents.process_document"): up = await client.post("/documents/upload", files=_pdf_upload("private.pdf", minimal_pdf)) doc_id = up.json()["id"] r = await other_client.get(f"/documents/{doc_id}") assert r.status_code == 404 # ── Patch document type ──────────────────────────────────────────────────────── async def test_patch_document_type(client, minimal_pdf): with patch("app.routers.documents.process_document"): up = await client.post("/documents/upload", files=_pdf_upload("patch.pdf", minimal_pdf)) doc_id = up.json()["id"] r = await client.patch(f"/documents/{doc_id}/type", json={"document_type": "receipt"}) assert r.status_code == 200 assert r.json()["document_type"] == "receipt" # ── Delete ───────────────────────────────────────────────────────────────────── async def test_delete_document(client, minimal_pdf): with patch("app.routers.documents.process_document"): up = await client.post("/documents/upload", files=_pdf_upload("del.pdf", minimal_pdf)) doc_id = up.json()["id"] r = await client.delete(f"/documents/{doc_id}") assert r.status_code == 204 r2 = await client.get(f"/documents/{doc_id}") assert r2.status_code == 404 async def test_delete_document_not_found(client): r = await client.delete("/documents/nonexistent-id") assert r.status_code == 404 async def test_other_user_cannot_delete_document(client, other_client, minimal_pdf): with patch("app.routers.documents.process_document"): up = await client.post("/documents/upload", files=_pdf_upload("owned.pdf", minimal_pdf)) doc_id = up.json()["id"] r = await other_client.delete(f"/documents/{doc_id}") assert r.status_code == 404 # Original owner can still get it r2 = await client.get(f"/documents/{doc_id}") assert r2.status_code == 200 # ── Category assignment ──────────────────────────────────────────────────────── async def test_assign_and_remove_category(client, minimal_pdf): with patch("app.routers.documents.process_document"): up = await client.post("/documents/upload", files=_pdf_upload("cat.pdf", minimal_pdf)) doc_id = up.json()["id"] cat = await client.post("/categories", json={"name": "Ops"}) cat_id = cat.json()["id"] r = await client.post(f"/documents/{doc_id}/categories/{cat_id}") assert r.status_code == 204 doc = await client.get(f"/documents/{doc_id}") cat_names = [c["name"] for c in doc.json()["categories"]] assert "Ops" in cat_names r2 = await client.delete(f"/documents/{doc_id}/categories/{cat_id}") assert r2.status_code == 204 doc2 = await client.get(f"/documents/{doc_id}") assert doc2.json()["categories"] == [] async def test_assign_category_idempotent(client, minimal_pdf): """Assigning the same category twice should not error.""" with patch("app.routers.documents.process_document"): up = await client.post("/documents/upload", files=_pdf_upload("idem.pdf", minimal_pdf)) doc_id = up.json()["id"] cat_id = (await client.post("/categories", json={"name": "Idem"})).json()["id"] await client.post(f"/documents/{doc_id}/categories/{cat_id}") r = await client.post(f"/documents/{doc_id}/categories/{cat_id}") assert r.status_code == 204 # no error on duplicate async def test_cannot_assign_other_users_category(client, other_client, minimal_pdf): with patch("app.routers.documents.process_document"): up = await client.post("/documents/upload", files=_pdf_upload("x.pdf", minimal_pdf)) doc_id = up.json()["id"] # other_client creates a category other_cat = await other_client.post("/categories", json={"name": "Foreign"}) other_cat_id = other_cat.json()["id"] # original user tries to assign it r = await client.post(f"/documents/{doc_id}/categories/{other_cat_id}") assert r.status_code == 404 # ── AI processing integration (with mock AI) ────────────────────────────────── async def test_processing_sets_extracted_data(client, invoice_pdf, mock_ai_service): """Upload + wait for background processing; verify extracted_data is populated.""" r = await client.post("/documents/upload", files=_pdf_upload("invoice.pdf", invoice_pdf)) assert r.status_code == 202 doc_id = r.json()["id"] # Background tasks run synchronously in test context once response is sent. # Poll the status endpoint briefly. import asyncio for _ in range(20): status_r = await client.get(f"/documents/{doc_id}/status") if status_r.json()["status"] in ("done", "failed"): break await asyncio.sleep(0.1) doc_r = await client.get(f"/documents/{doc_id}") doc = doc_r.json() assert doc["status"] == "done" assert doc["document_type"] == "invoice" assert doc["extracted_data"] is not None extracted = json.loads(doc["extracted_data"]) assert extracted["vendor_name"] == "ACME Corp" assert "suggested_categories" in extracted assert isinstance(extracted["suggested_categories"], list) assert len(extracted["suggested_categories"]) > 0 # ── Graceful degradation when ai-service is unavailable ────────────────────── async def test_processing_fails_gracefully_when_ai_service_502(client, invoice_pdf): """When ai-service returns an error, document status should be 'failed', not crash.""" from app.services.ai_client import AIServiceError with patch( "app.services.ai_client.classify_document", side_effect=AIServiceError("ai-service returned 502"), ): r = await client.post("/documents/upload", files=_pdf_upload("fail.pdf", invoice_pdf)) assert r.status_code == 202 doc_id = r.json()["id"] import asyncio for _ in range(20): status_r = await client.get(f"/documents/{doc_id}/status") if status_r.json()["status"] in ("done", "failed"): break await asyncio.sleep(0.1) doc = (await client.get(f"/documents/{doc_id}")).json() assert doc["status"] == "failed" assert "ai-service" in (doc.get("error_message") or "").lower() # ── Live tests (require real PDFs in tests/pdfs/) ───────────────────────────── async def test_live_upload_real_pdf(client, real_pdfs, mock_ai_service): """Upload each real PDF from tests/pdfs/ and verify it reaches 'done'.""" import asyncio for pdf_path in real_pdfs: data = pdf_path.read_bytes() r = await client.post( "/documents/upload", files=_pdf_upload(pdf_path.name, data), ) assert r.status_code == 202, f"Upload failed for {pdf_path.name}: {r.text}" doc_id = r.json()["id"] for _ in range(30): status_r = await client.get(f"/documents/{doc_id}/status") if status_r.json()["status"] in ("done", "failed"): break await asyncio.sleep(0.2) final = await client.get(f"/documents/{doc_id}") assert final.json()["status"] == "done", ( f"{pdf_path.name} ended with status '{final.json()['status']}': " f"{final.json().get('error_message')}" )