""" Document API tests — async only (Plan 05 cutover). Legacy sync tests (using the flat-file storage layer) were deleted in Plan 05. All tests here use async_client (httpx.AsyncClient + ASGITransport + in-memory SQLite). """ from __future__ import annotations import re import pytest async def test_upload_txt_no_classify(async_client, sample_txt): with open(sample_txt, "rb") as f: resp = await async_client.post( "/api/documents/upload", files={"file": ("sample.txt", f, "text/plain")}, data={"auto_classify": "false"}, ) assert resp.status_code == 200 data = resp.json() assert data["original_name"] == "sample.txt" assert "extracted_text" in data assert "invoices" in data["extracted_text"].lower() or len(data["extracted_text"]) > 0 assert data["topics"] == [] assert "id" in data async def test_upload_pdf_no_classify(async_client, sample_pdf): with open(sample_pdf, "rb") as f: resp = await async_client.post( "/api/documents/upload", files={"file": ("sample.pdf", f, "application/pdf")}, data={"auto_classify": "false"}, ) assert resp.status_code == 200 data = resp.json() assert data["mime_type"] == "application/pdf" assert len(data["extracted_text"]) > 0 async def test_list_documents(async_client, sample_txt): with open(sample_txt, "rb") as f: await async_client.post( "/api/documents/upload", files={"file": ("a.txt", f, "text/plain")}, data={"auto_classify": "false"}, ) resp = await async_client.get("/api/documents") assert resp.status_code == 200 data = resp.json() assert data["total"] == 1 assert len(data["items"]) == 1 async def test_list_documents_filter_by_topic(async_client, db_session, sample_txt): with open(sample_txt, "rb") as f: upload = ( await async_client.post( "/api/documents/upload", files={"file": ("a.txt", f, "text/plain")}, data={"auto_classify": "false"}, ) ).json() # Wire a topic via the storage service directly (replaces old flat-file call) from services import storage await storage.update_document_topics(db_session, upload["id"], ["finance"]) resp = await async_client.get("/api/documents?topic=finance") assert resp.json()["total"] == 1 resp2 = await async_client.get("/api/documents?topic=legal") assert resp2.json()["total"] == 0 async def test_get_document(async_client, sample_txt): with open(sample_txt, "rb") as f: upload = ( await async_client.post( "/api/documents/upload", files={"file": ("a.txt", f, "text/plain")}, data={"auto_classify": "false"}, ) ).json() resp = await async_client.get(f"/api/documents/{upload['id']}") assert resp.status_code == 200 assert resp.json()["id"] == upload["id"] async def test_get_document_not_found(async_client): resp = await async_client.get("/api/documents/nonexistent") assert resp.status_code == 404 async def test_delete_document(async_client, sample_txt): with open(sample_txt, "rb") as f: upload = ( await async_client.post( "/api/documents/upload", files={"file": ("a.txt", f, "text/plain")}, data={"auto_classify": "false"}, ) ).json() resp = await async_client.delete(f"/api/documents/{upload['id']}") assert resp.status_code == 200 assert resp.json()["success"] is True resp2 = await async_client.get(f"/api/documents/{upload['id']}") assert resp2.status_code == 404 async def test_delete_document_not_found(async_client): resp = await async_client.delete("/api/documents/nonexistent") assert resp.status_code == 404 async def test_upload_empty_file(async_client): resp = await async_client.post( "/api/documents/upload", files={"file": ("empty.txt", b"", "text/plain")}, data={"auto_classify": "false"}, ) assert resp.status_code == 400 async def test_upload_persists_to_postgres_and_minio(async_client, sample_txt): """After a successful upload, document is persisted and queryable via GET (STORE-01, STORE-02).""" with open(sample_txt, "rb") as f: resp = await async_client.post( "/api/documents/upload", files={"file": ("sample.txt", f, "text/plain")}, data={"auto_classify": "false"}, ) assert resp.status_code == 200 data = resp.json() # Response must include a UUID-format id uuid_pattern = re.compile( r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$" ) assert "id" in data, "Upload response missing 'id'" assert uuid_pattern.match(data["id"]), f"id '{data['id']}' is not a UUID" # Metadata round-trips via GET doc_id = data["id"] get_resp = await async_client.get(f"/api/documents/{doc_id}") assert get_resp.status_code == 200 get_data = get_resp.json() assert get_data["original_name"] == "sample.txt"